parser

package
v0.10.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 5, 2026 License: MIT Imports: 18 Imported by: 0

Documentation

Overview

ABOUTME: Parses Claude Code JSONL session files into structured session data. ABOUTME: Detects DAG forks in uuid/parentUuid trees and splits large-gap forks into separate sessions.

ABOUTME: Parses OpenClaw JSONL session files into structured session data. ABOUTME: Handles OpenClaw's wrapped message format with toolResult role.

Index

Constants

This section is empty.

Variables

View Source
var Registry = []AgentDef{
	{
		Type:           AgentClaude,
		DisplayName:    "Claude Code",
		EnvVar:         "CLAUDE_PROJECTS_DIR",
		ConfigKey:      "claude_project_dirs",
		DefaultDirs:    []string{".claude/projects"},
		IDPrefix:       "",
		FileBased:      true,
		DiscoverFunc:   DiscoverClaudeProjects,
		FindSourceFunc: FindClaudeSourceFile,
	},
	{
		Type:           AgentCodex,
		DisplayName:    "Codex",
		EnvVar:         "CODEX_SESSIONS_DIR",
		ConfigKey:      "codex_sessions_dirs",
		DefaultDirs:    []string{".codex/sessions"},
		IDPrefix:       "codex:",
		FileBased:      true,
		DiscoverFunc:   DiscoverCodexSessions,
		FindSourceFunc: FindCodexSourceFile,
	},
	{
		Type:           AgentCopilot,
		DisplayName:    "Copilot",
		EnvVar:         "COPILOT_DIR",
		ConfigKey:      "copilot_dirs",
		DefaultDirs:    []string{".copilot"},
		IDPrefix:       "copilot:",
		WatchSubdirs:   []string{"session-state"},
		FileBased:      true,
		DiscoverFunc:   DiscoverCopilotSessions,
		FindSourceFunc: FindCopilotSourceFile,
	},
	{
		Type:           AgentGemini,
		DisplayName:    "Gemini",
		EnvVar:         "GEMINI_DIR",
		ConfigKey:      "gemini_dirs",
		DefaultDirs:    []string{".gemini"},
		IDPrefix:       "gemini:",
		WatchSubdirs:   []string{"tmp"},
		FileBased:      true,
		DiscoverFunc:   DiscoverGeminiSessions,
		FindSourceFunc: FindGeminiSourceFile,
	},
	{
		Type:        AgentOpenCode,
		DisplayName: "OpenCode",
		EnvVar:      "OPENCODE_DIR",
		ConfigKey:   "opencode_dirs",
		DefaultDirs: []string{".local/share/opencode"},
		IDPrefix:    "opencode:",
		FileBased:   false,
	},
	{
		Type:           AgentCursor,
		DisplayName:    "Cursor",
		EnvVar:         "CURSOR_PROJECTS_DIR",
		DefaultDirs:    []string{".cursor/projects"},
		IDPrefix:       "cursor:",
		FileBased:      true,
		DiscoverFunc:   DiscoverCursorSessions,
		FindSourceFunc: FindCursorSourceFile,
	},
	{
		Type:           AgentAmp,
		DisplayName:    "Amp",
		EnvVar:         "AMP_DIR",
		DefaultDirs:    []string{".local/share/amp/threads"},
		IDPrefix:       "amp:",
		FileBased:      true,
		DiscoverFunc:   DiscoverAmpSessions,
		FindSourceFunc: FindAmpSourceFile,
	},
	{
		Type:        AgentVSCodeCopilot,
		DisplayName: "VSCode Copilot",
		EnvVar:      "VSCODE_COPILOT_DIR",
		ConfigKey:   "vscode_copilot_dirs",
		DefaultDirs: []string{

			"AppData/Roaming/Code/User",
			"AppData/Roaming/Code - Insiders/User",
			"AppData/Roaming/VSCodium/User",

			"Library/Application Support/Code/User",
			"Library/Application Support/Code - Insiders/User",
			"Library/Application Support/VSCodium/User",

			".config/Code/User",
			".config/Code - Insiders/User",
			".config/VSCodium/User",
		},
		IDPrefix: "vscode-copilot:",
		WatchSubdirs: []string{
			"workspaceStorage",
			"globalStorage",
		},
		FileBased:      true,
		DiscoverFunc:   DiscoverVSCodeCopilotSessions,
		FindSourceFunc: FindVSCodeCopilotSourceFile,
	},
	{
		Type:           AgentOpenClaw,
		DisplayName:    "OpenClaw",
		EnvVar:         "OPENCLAW_DIR",
		ConfigKey:      "openclaw_dirs",
		DefaultDirs:    []string{".openclaw/agents"},
		IDPrefix:       "openclaw:",
		FileBased:      true,
		DiscoverFunc:   DiscoverOpenClawSessions,
		FindSourceFunc: FindOpenClawSourceFile,
	},
}

Registry lists all supported agents. Order is stable and used for iteration in config, sync, and watcher setup.

Functions

func AmpThreadID added in v0.10.0

func AmpThreadID(data []byte) string

AmpThreadID extracts the id field from raw Amp thread JSON data without fully parsing.

func BuildGeminiProjectMap added in v0.10.0

func BuildGeminiProjectMap(
	geminiDir string,
) map[string]string

buildGeminiProjectMap reads ~/.gemini/projects.json and ~/.gemini/trustedFolders.json to build a map from directory name to resolved project name. BuildGeminiProjectMap reads Gemini config files and returns a map from directory name to resolved project name.

func CursorSessionID added in v0.9.0

func CursorSessionID(path string) string

CursorSessionID derives a session ID from a transcript file path by stripping whatever extension is present.

func DecodeContent added in v0.10.0

func DecodeContent(raw string) string

DecodeContent extracts the text from a raw JSON tool result content value (the ContentRaw field of ParsedToolResult). It handles both plain string and array-of-blocks formats.

func DecodeCursorProjectDir added in v0.9.0

func DecodeCursorProjectDir(dirName string) string

DecodeCursorProjectDir extracts a clean project name from a Cursor-style hyphenated directory name. Cursor encodes absolute paths by replacing / and . with hyphens, e.g. "Users-fiona-Documents-mcp-cursor-analytics".

Scans forward from the home-directory root to find the first marker, handling multi-token usernames (e.g. "Users-john-doe-Documents-project").

func ExtractClaudeProjectHints added in v0.4.0

func ExtractClaudeProjectHints(
	path string,
) (cwd, gitBranch string)

ExtractClaudeProjectHints reads project-identifying metadata from a Claude Code JSONL session file.

func ExtractCwdFromSession

func ExtractCwdFromSession(path string) string

ExtractCwdFromSession reads the first cwd field from a Claude Code JSONL session file.

func ExtractProjectFromCwd

func ExtractProjectFromCwd(cwd string) string

ExtractProjectFromCwd extracts a project name from a working directory path. If cwd is inside a git repository (including linked worktrees), this returns the repository root directory name. Otherwise it falls back to the last path component.

func ExtractProjectFromCwdWithBranch added in v0.4.0

func ExtractProjectFromCwdWithBranch(
	cwd, gitBranch string,
) string

ExtractProjectFromCwdWithBranch extracts a canonical project name from cwd and optionally git branch metadata. Branch is used as a fallback heuristic when the original worktree path no longer exists on disk.

func ExtractTextContent

func ExtractTextContent(
	content gjson.Result,
) (string, bool, bool, []ParsedToolCall, []ParsedToolResult)

ExtractTextContent extracts readable text from message content. content can be a string or a JSON array of blocks. Returns the text, hasThinking, hasToolUse, tool calls, and tool results.

func FindAmpSourceFile added in v0.10.0

func FindAmpSourceFile(threadsDir, threadID string) string

FindAmpSourceFile locates an Amp thread file by its raw thread ID (without the "amp:" prefix).

func FindClaudeSourceFile added in v0.10.0

func FindClaudeSourceFile(
	projectsDir, sessionID string,
) string

FindClaudeSourceFile finds the original JSONL file for a Claude session ID by searching all project directories.

func FindCodexSourceFile added in v0.10.0

func FindCodexSourceFile(sessionsDir, sessionID string) string

FindCodexSourceFile finds a Codex session file by UUID. Searches the year/month/day directory structure for files matching rollout-{timestamp}-{uuid}.jsonl.

func FindCopilotSourceFile added in v0.10.0

func FindCopilotSourceFile(
	copilotDir, rawID string,
) string

FindCopilotSourceFile locates a Copilot session file by UUID. Checks both bare (<uuid>.jsonl) and directory (<uuid>/events.jsonl) layouts.

func FindCursorSourceFile added in v0.10.0

func FindCursorSourceFile(
	projectsDir, sessionID string,
) string

FindCursorSourceFile finds a Cursor transcript file by session UUID. Prefers .jsonl over .txt.

func FindGeminiSourceFile added in v0.10.0

func FindGeminiSourceFile(
	geminiDir, sessionID string,
) string

FindGeminiSourceFile locates a Gemini session file by its session UUID. Searches all project hash directories.

func FindOpenClawSourceFile added in v0.10.0

func FindOpenClawSourceFile(agentsDir, rawID string) string

FindOpenClawSourceFile locates an OpenClaw session file by its raw ID (without the "openclaw:" prefix). The raw ID has the format "<agentId>:<sessionId>", which directly maps to the file at <agentsDir>/<agentId>/sessions/<sessionId>.jsonl.

If the active .jsonl file does not exist (archive-only session), the sessions directory is scanned for any archived file whose logical session ID matches. When multiple archived files match, the best candidate (newest by filename timestamp) is returned.

func FindVSCodeCopilotSourceFile added in v0.10.0

func FindVSCodeCopilotSourceFile(
	vscodeUserDir, rawID string,
) string

FindVSCodeCopilotSourceFile locates a VSCode Copilot session file by UUID (.jsonl preferred over .json).

func GeminiSessionID

func GeminiSessionID(data []byte) string

GeminiSessionID extracts the sessionId field from raw Gemini session JSON data without fully parsing.

func GetProjectName

func GetProjectName(dirName string) string

GetProjectName converts an encoded Claude project directory name to a clean project name. Claude encodes paths like /Users/alice/code/my-app as -Users-alice-code-my-app.

func InferRelationshipTypes added in v0.7.0

func InferRelationshipTypes(results []ParseResult)

InferRelationshipTypes sets RelationshipType on results that have a ParentSessionID but no explicit type. Sessions with an "agent-" prefix are subagents; others are continuations.

func IsAmpThreadFileName added in v0.10.0

func IsAmpThreadFileName(name string) bool

IsAmpThreadFileName reports whether name matches the Amp thread file pattern (T-*.json).

func IsCursorTranscriptExt added in v0.10.0

func IsCursorTranscriptExt(name string) bool

isCursorTranscriptExt returns true if the filename has a recognized Cursor transcript extension (.txt or .jsonl). IsCursorTranscriptExt reports whether the filename has a recognized Cursor transcript extension (.txt or .jsonl).

func IsDigits added in v0.10.0

func IsDigits(s string) bool

IsDigits reports whether s is non-empty and contains only Unicode digit characters.

func IsOpenClawSessionFile added in v0.10.0

func IsOpenClawSessionFile(name string) bool

IsOpenClawSessionFile reports whether a filename is an OpenClaw session file. It matches active files (*.jsonl) and the known archive suffixes: .jsonl.deleted.<ts>, .jsonl.reset.<ts>, and .jsonl.full.bak.

func IsRegularFile added in v0.10.0

func IsRegularFile(path string) bool

isRegularFile returns true if path exists and is a regular file (not a symlink, directory, or other special file). IsRegularFile reports whether path is a regular file (not a symlink, directory, or special file).

func IsValidSessionID added in v0.10.0

func IsValidSessionID(id string) bool

IsValidSessionID reports whether id contains only alphanumeric characters, dashes, and underscores.

func NeedsProjectReparse

func NeedsProjectReparse(project string) bool

NeedsProjectReparse checks if a stored project name looks like an un-decoded encoded path that should be re-extracted.

func NormalizeName added in v0.6.0

func NormalizeName(s string) string

NormalizeName converts dashes to underscores for consistent project name formatting.

func NormalizeToolCategory

func NormalizeToolCategory(rawName string) string

NormalizeToolCategory maps a raw tool name to a normalized category. Categories: Read, Edit, Write, Bash, Grep, Glob, Task, Tool, Other.

func OpenClawSessionID added in v0.10.0

func OpenClawSessionID(name string) string

OpenClawSessionID extracts the session UUID from an OpenClaw session filename, stripping any archive suffix. "abc.jsonl" → "abc" "abc.jsonl.deleted.2026-02-19T08-59-24.951Z" → "abc" "abc.jsonl.full.bak" → "abc"

func ParseAmpSession added in v0.10.0

func ParseAmpSession(
	path, machine string,
) (*ParsedSession, []ParsedMessage, error)

ParseAmpSession parses an Amp thread JSON file. Each thread is a single JSON document at ~/.local/share/amp/threads/T-*.json.

func ParseCodexSession

func ParseCodexSession(
	path, machine string, includeExec bool,
) (*ParsedSession, []ParsedMessage, error)

ParseCodexSession parses a Codex JSONL session file. Returns nil session if the session is non-interactive and includeExec is false.

func ParseCopilotSession added in v0.5.0

func ParseCopilotSession(
	path, machine string,
) (*ParsedSession, []ParsedMessage, error)

ParseCopilotSession parses a Copilot JSONL session file. Returns (nil, nil, nil) if the file doesn't exist or contains no user/assistant messages.

func ParseCursorSession added in v0.9.0

func ParseCursorSession(
	path, project, machine string,
) (*ParsedSession, []ParsedMessage, error)

ParseCursorSession parses a Cursor agent transcript file. Transcripts are plain text with "user:" and "assistant:" role markers, tool calls, and thinking blocks.

func ParseGeminiSession

func ParseGeminiSession(
	path, project, machine string,
) (*ParsedSession, []ParsedMessage, error)

ParseGeminiSession parses a Gemini CLI session JSON file. Unlike Claude/Codex JSONL, each Gemini file is a single JSON document containing all messages.

func ParseOpenClawSession added in v0.10.0

func ParseOpenClawSession(
	path, project, machine string,
) (*ParsedSession, []ParsedMessage, error)

ParseOpenClawSession parses an OpenClaw JSONL session file. OpenClaw stores messages in a JSONL format with a session header line, message entries, compaction summaries, and metadata events.

func ParseOpenCodeSession added in v0.5.0

func ParseOpenCodeSession(
	dbPath, sessionID, machine string,
) (*ParsedSession, []ParsedMessage, error)

ParseOpenCodeSession parses a single session by ID from the OpenCode database.

func ParseVSCodeCopilotSession added in v0.10.0

func ParseVSCodeCopilotSession(
	path, project, machine string,
) (*ParsedSession, []ParsedMessage, error)

ParseVSCodeCopilotSession parses a VSCode Copilot chat session file (.json or .jsonl). Returns (nil, nil, nil) if the file is empty or contains no meaningful content.

func ReadVSCodeWorkspaceManifest added in v0.10.0

func ReadVSCodeWorkspaceManifest(hashDir string) string

ReadVSCodeWorkspaceManifest reads the workspace.json file in a workspaceStorage hash directory and extracts the project folder path.

func ResolveGeminiProject added in v0.10.0

func ResolveGeminiProject(
	dirName string,
	projectMap map[string]string,
) string

resolveGeminiProject maps a tmp/ subdirectory name to a project name. ResolveGeminiProject maps a tmp/ subdirectory name to a project name using the project map.

Types

type AgentDef added in v0.10.0

type AgentDef struct {
	Type         AgentType
	DisplayName  string   // "Claude Code", "Codex", etc.
	EnvVar       string   // env var for dir override
	ConfigKey    string   // JSON key in config.json ("" = none)
	DefaultDirs  []string // paths relative to $HOME
	IDPrefix     string   // session ID prefix ("" for Claude)
	WatchSubdirs []string // subdirs to watch (nil = watch root)
	FileBased    bool     // false for DB-backed agents

	// DiscoverFunc finds session files under a root directory.
	// Nil for non-file-based agents.
	DiscoverFunc func(string) []DiscoveredFile

	// FindSourceFunc locates a single session's source file
	// given a root directory and the raw session ID (prefix
	// already stripped). Nil for non-file-based agents.
	FindSourceFunc func(string, string) string
}

AgentDef describes a supported coding agent's filesystem layout, configuration keys, and session ID conventions.

func AgentByPrefix added in v0.10.0

func AgentByPrefix(sessionID string) (AgentDef, bool)

AgentByPrefix returns the AgentDef whose IDPrefix matches the session ID. For Claude (empty prefix), the match succeeds only when no other prefix matches and the ID does not contain a colon.

func AgentByType added in v0.10.0

func AgentByType(t AgentType) (AgentDef, bool)

AgentByType returns the AgentDef for the given type.

type AgentType

type AgentType string

AgentType identifies the AI agent that produced a session.

const (
	AgentClaude        AgentType = "claude"
	AgentCodex         AgentType = "codex"
	AgentCopilot       AgentType = "copilot"
	AgentGemini        AgentType = "gemini"
	AgentOpenCode      AgentType = "opencode"
	AgentCursor        AgentType = "cursor"
	AgentAmp           AgentType = "amp"
	AgentVSCodeCopilot AgentType = "vscode-copilot"
	AgentOpenClaw      AgentType = "openclaw"
)

type DiscoveredFile added in v0.10.0

type DiscoveredFile struct {
	Path    string
	Project string    // pre-extracted project name
	Agent   AgentType // which agent this file belongs to
}

DiscoveredFile holds a discovered session file.

func DiscoverAmpSessions added in v0.10.0

func DiscoverAmpSessions(threadsDir string) []DiscoveredFile

DiscoverAmpSessions finds all thread JSON files under the Amp threads directory (~/.local/share/amp/threads/T-*.json).

func DiscoverClaudeProjects added in v0.10.0

func DiscoverClaudeProjects(projectsDir string) []DiscoveredFile

DiscoverClaudeProjects finds all project directories under the Claude projects dir and returns their JSONL session files.

func DiscoverCodexSessions added in v0.10.0

func DiscoverCodexSessions(sessionsDir string) []DiscoveredFile

DiscoverCodexSessions finds all JSONL files under the Codex sessions dir (year/month/day structure).

func DiscoverCopilotSessions added in v0.10.0

func DiscoverCopilotSessions(
	copilotDir string,
) []DiscoveredFile

DiscoverCopilotSessions finds all JSONL files under <copilotDir>/session-state/. Supports both bare format (<uuid>.jsonl) and directory format (<uuid>/events.jsonl).

func DiscoverCursorSessions added in v0.10.0

func DiscoverCursorSessions(
	projectsDir string,
) []DiscoveredFile

DiscoverCursorSessions finds all agent transcript files under the Cursor projects dir (<projectsDir>/<project>/agent-transcripts/<uuid>.txt). All discovered paths are validated to resolve within the canonical projectsDir, preventing symlink escapes.

func DiscoverGeminiSessions added in v0.10.0

func DiscoverGeminiSessions(
	geminiDir string,
) []DiscoveredFile

DiscoverGeminiSessions finds all session JSON files under the Gemini directory (~/.gemini/tmp/*/chats/session-*.json).

func DiscoverOpenClawSessions added in v0.10.0

func DiscoverOpenClawSessions(agentsDir string) []DiscoveredFile

DiscoverOpenClawSessions finds all JSONL session files under the OpenClaw agents directory. The directory structure is: <agentsDir>/<agentId>/sessions/<sessionId>.jsonl

When both active (.jsonl) and archived (.jsonl.deleted.*, .jsonl.full.bak, .jsonl.reset.*) files exist for the same logical session ID, only one file is returned per session: the active .jsonl file is preferred; if absent, the newest archived file (by filename, which embeds a timestamp, or by file mtime as a fallback) is chosen.

func DiscoverVSCodeCopilotSessions added in v0.10.0

func DiscoverVSCodeCopilotSessions(
	vscodeUserDir string,
) []DiscoveredFile

DiscoverVSCodeCopilotSessions traverses the VSCode workspaceStorage directory to find chatSessions/*.json and *.jsonl files. When both formats exist for the same session UUID, the .jsonl file takes priority. It also checks globalStorage/emptyWindowChatSessions. The vscodeUserDir should point to e.g.

~/Library/Application Support/Code/User (macOS)
~/.config/Code/User (Linux)

type FileInfo

type FileInfo struct {
	Path  string
	Size  int64
	Mtime int64
	Hash  string
}

FileInfo holds file system metadata for a session source file.

type OpenCodeSession added in v0.5.0

type OpenCodeSession struct {
	Session  ParsedSession
	Messages []ParsedMessage
}

OpenCodeSession bundles a parsed session with its messages.

func ParseOpenCodeDB added in v0.5.0

func ParseOpenCodeDB(
	dbPath, machine string,
) ([]OpenCodeSession, error)

ParseOpenCodeDB opens the OpenCode SQLite database read-only and returns all sessions with messages.

type OpenCodeSessionMeta added in v0.5.0

type OpenCodeSessionMeta struct {
	SessionID   string
	VirtualPath string
	FileMtime   int64
}

OpenCodeSessionMeta is lightweight metadata for a session, used to detect changes without parsing messages or parts.

func ListOpenCodeSessionMeta added in v0.5.0

func ListOpenCodeSessionMeta(
	dbPath string,
) ([]OpenCodeSessionMeta, error)

ListOpenCodeSessionMeta returns lightweight metadata for all sessions without parsing messages or parts. Used by the sync engine to detect which sessions have changed.

type ParseResult added in v0.7.0

type ParseResult struct {
	Session  ParsedSession
	Messages []ParsedMessage
}

ParseResult pairs a parsed session with its messages.

func ParseClaudeSession

func ParseClaudeSession(
	path, project, machine string,
) ([]ParseResult, error)

ParseClaudeSession parses a Claude Code JSONL session file. Returns one or more ParseResult structs (multiple when forks are detected in the uuid/parentUuid DAG).

type ParsedMessage

type ParsedMessage struct {
	Ordinal       int
	Role          RoleType
	Content       string
	Timestamp     time.Time
	HasThinking   bool
	HasToolUse    bool
	ContentLength int
	ToolCalls     []ParsedToolCall
	ToolResults   []ParsedToolResult
}

ParsedMessage holds a single extracted message.

type ParsedSession

type ParsedSession struct {
	ID               string
	Project          string
	Machine          string
	Agent            AgentType
	ParentSessionID  string
	RelationshipType RelationshipType
	FirstMessage     string
	StartedAt        time.Time
	EndedAt          time.Time
	MessageCount     int
	UserMessageCount int
	File             FileInfo
}

ParsedSession holds session metadata extracted from a JSONL file.

type ParsedToolCall

type ParsedToolCall struct {
	ToolUseID         string // tool_use block id from session data
	ToolName          string // raw name from session data
	Category          string // normalized: Read, Edit, Write, Bash, etc.
	InputJSON         string // raw JSON of the input object
	SkillName         string // skill name when ToolName is "Skill"
	SubagentSessionID string // linked subagent session file (e.g. "agent-{task_id}")
}

ParsedToolCall holds a single tool invocation extracted from a message.

type ParsedToolResult added in v0.4.0

type ParsedToolResult struct {
	ToolUseID     string
	ContentLength int
	ContentRaw    string // raw JSON of the content field; decode with DecodeContent
}

ParsedToolResult holds metadata about a tool result block in a user message (the response to a prior tool_use).

type RelationshipType added in v0.7.0

type RelationshipType string

RelationshipType describes how a session relates to its parent.

const (
	RelNone         RelationshipType = ""
	RelContinuation RelationshipType = "continuation"
	RelSubagent     RelationshipType = "subagent"
	RelFork         RelationshipType = "fork"
)

type RoleType

type RoleType string

RoleType identifies the role of a message sender.

const (
	RoleUser      RoleType = "user"
	RoleAssistant RoleType = "assistant"
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL