Documentation
¶
Index ¶
- type CatalogEntry
- type ChangeEntry
- type CheckFile
- type CheckResult
- type Engine
- func (e *Engine) Catalog(ctx context.Context) ([]CatalogEntry, error)
- func (e *Engine) Check(ctx context.Context, domain string) (*CheckResult, error)
- func (e *Engine) Close() error
- func (e *Engine) Diff(ctx context.Context, from, to string) (string, error)
- func (e *Engine) Discover(ctx context.Context, rawURL string) (*discovery.Result, error)
- func (e *Engine) History(ctx context.Context, site string, limit int) ([]ChangeEntry, error)
- func (e *Engine) Init(ctx context.Context, rawURL string) (*SiteInfo, error)
- func (e *Engine) List(ctx context.Context) ([]SiteInfo, error)
- func (e *Engine) ListFiles(ctx context.Context, domain string) ([]FileEntry, error)
- func (e *Engine) Outline(ctx context.Context, domain, path string, maxDepth, maxSections int) (*OutlineResult, error)
- func (e *Engine) ReadSection(ctx context.Context, domain, path, section string, maxLines int) (string, error)
- func (e *Engine) RebuildIndex(ctx context.Context) error
- func (e *Engine) Refresh(ctx context.Context, domain string) (*SyncResult, error)
- func (e *Engine) Remove(ctx context.Context, domain string, keepFiles bool) error
- func (e *Engine) Search(ctx context.Context, query string, site, contentType, category, path string, ...) (*SearchResult, error)
- func (e *Engine) SearchFull(ctx context.Context, query string, site, contentType, category string) (*SearchFullResult, error)
- func (e *Engine) Stale(ctx context.Context, threshold time.Duration) ([]SiteStats, error)
- func (e *Engine) Stats(ctx context.Context) (*Stats, error)
- func (e *Engine) Status(ctx context.Context, domain string) (*SiteInfo, error)
- func (e *Engine) Summarize(ctx context.Context, domain, path, summary string) error
- func (e *Engine) Sync(ctx context.Context, domain string) (*SyncResult, error)
- func (e *Engine) SyncAll(ctx context.Context) ([]SyncResult, error)
- func (e *Engine) SyncWithContentTypes(ctx context.Context, domain, contentTypes string) (*SyncResult, error)
- func (e *Engine) Tag(ctx context.Context, domain, path, category string) error
- type FileEntry
- type Option
- func WithCategorizer(c store.Categorizer) Option
- func WithConfig(c *config.Config) Option
- func WithDiscovery(d discovery.ContentDiscoverer) Option
- func WithEvents(e events.EventEmitter) Option
- func WithFetcher(f fetcher.HTTPFetcher) Option
- func WithGit(g store.VersionStore) Option
- func WithIndexer(i store.Indexer) Option
- func WithProcessors(p ...content.Processor) Option
- func WithRespectRobots() Option
- func WithSyncer(s mirror.Syncer) Option
- type OutlineResult
- type SearchFullResult
- type SearchHit
- type SearchResult
- type SiteInfo
- type SiteStats
- type Stats
- type SyncResult
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CatalogEntry ¶
type CatalogEntry struct {
Domain string `json:"domain"`
URL string `json:"url"`
Title string `json:"title,omitempty"` // H1 from llms.txt
Description string `json:"description,omitempty"` // Blockquote from llms.txt
Topics []string `json:"topics,omitempty"` // H2/H3 sections from llms.txt
Categories map[string]int `json:"categories,omitempty"` // category → file count
FileCount int `json:"file_count"`
}
CatalogEntry is a compact summary of a tracked site's LLM content, derived from its llms.txt structure and category index.
type CheckFile ¶
type CheckFile struct {
Path string `json:"path"`
Size int `json:"size"`
ContentType string `json:"content_type"`
}
CheckFile describes a file available for syncing.
type CheckResult ¶
CheckResult reports what would change without downloading.
type Engine ¶
type Engine struct {
Config *config.Config
Store *store.Store
Git store.VersionStore
Index store.Indexer
Discovery discovery.ContentDiscoverer
Mirror mirror.Syncer
Fetcher fetcher.HTTPFetcher
Events events.EventEmitter
Processors []content.Processor
Categorizer store.Categorizer
RootDir string
}
Engine is the core orchestrator that ties all subsystems together. All component fields are interfaces, allowing alternative implementations to be injected via functional options in New().
func New ¶
New creates an Engine rooted at the given directory. Components not provided via options are constructed with defaults.
func (*Engine) Catalog ¶
func (e *Engine) Catalog(ctx context.Context) ([]CatalogEntry, error)
Catalog returns a compact summary of all tracked sites, extracting title, description, and topic structure from each site's llms.txt.
func (*Engine) History ¶
History returns recent change entries from git, optionally filtered to a site.
func (*Engine) Init ¶
Init adds a new site to track. It probes for content but does not download yet.
func (*Engine) Outline ¶
func (e *Engine) Outline(ctx context.Context, domain, path string, maxDepth, maxSections int) (*OutlineResult, error)
Outline parses a mirrored file and returns its heading structure. maxDepth limits heading levels (0 = no limit, default 3 recommended). maxSections caps entries returned (0 = no limit, default 100 recommended).
func (*Engine) ReadSection ¶
func (e *Engine) ReadSection(ctx context.Context, domain, path, section string, maxLines int) (string, error)
ReadSection reads a specific section of a file by heading match, or a line range. If section is non-empty, delegates to the content processor. If section is empty and maxLines > 0, returns the first maxLines lines.
func (*Engine) RebuildIndex ¶
RebuildIndex rebuilds the search index from files on disk.
func (*Engine) Refresh ¶
Refresh re-syncs a tracked site. If content_types was set during the original scan, Refresh honours that filter so it doesn't pull content the agent intentionally excluded. Uses cached ETags for conditional fetching.
func (*Engine) Search ¶
func (e *Engine) Search(ctx context.Context, query string, site, contentType, category, path string, limit, offset int) (*SearchResult, error)
Search performs a full-text search across indexed content.
func (*Engine) SearchFull ¶
func (e *Engine) SearchFull(ctx context.Context, query string, site, contentType, category string) (*SearchFullResult, error)
SearchFull searches and returns the full content of the top hit.
func (*Engine) Status ¶
Status returns info about a tracked site including category breakdown and age.
func (*Engine) SyncAll ¶
func (e *Engine) SyncAll(ctx context.Context) ([]SyncResult, error)
SyncAll syncs all tracked sites.
func (*Engine) SyncWithContentTypes ¶
func (e *Engine) SyncWithContentTypes(ctx context.Context, domain, contentTypes string) (*SyncResult, error)
SyncWithContentTypes syncs only files matching the given content types (comma-separated, e.g. "llms-txt,llms-full-txt"). This lets agents skip companion pages when they only need the index files. The filter is persisted in the site config so that Refresh honours it.
type FileEntry ¶
type FileEntry struct {
Path string `json:"path"`
Size int64 `json:"size"`
ContentType string `json:"content_type"`
Category string `json:"category"`
}
FileEntry describes a file in a site's mirror.
type Option ¶
type Option func(*engineOpts)
Option configures engine behavior via functional options.
func WithCategorizer ¶
func WithCategorizer(c store.Categorizer) Option
WithCategorizer injects a custom page categorizer (e.g. LLM-based).
func WithConfig ¶
WithConfig injects a pre-loaded configuration, skipping file-based loading.
func WithDiscovery ¶
func WithDiscovery(d discovery.ContentDiscoverer) Option
WithDiscovery injects a custom content discoverer.
func WithEvents ¶
func WithEvents(e events.EventEmitter) Option
WithEvents injects a custom event emitter for observability.
func WithFetcher ¶
func WithFetcher(f fetcher.HTTPFetcher) Option
WithFetcher injects a custom HTTP fetcher (e.g. Playwright-capable).
func WithGit ¶
func WithGit(g store.VersionStore) Option
WithGit injects a custom version store for change tracking.
func WithIndexer ¶
WithIndexer injects a custom search indexer (e.g. Bleve, vector search).
func WithProcessors ¶
WithProcessors sets the content processors (e.g. markdown, reST, chunker).
func WithRespectRobots ¶
func WithRespectRobots() Option
WithRespectRobots enables robots.txt checking during discovery.
func WithSyncer ¶
WithSyncer injects a custom content syncer (e.g. with cleaning/chunking pipeline).
type OutlineResult ¶
type OutlineResult struct {
Domain string `json:"domain"`
Path string `json:"path"`
TotalSize int `json:"total_size"`
Summary string `json:"summary,omitempty"`
Hint string `json:"hint,omitempty"`
Sections []content.Section `json:"sections"`
Truncated bool `json:"truncated,omitempty"`
}
OutlineResult is the table of contents for a file.
type SearchFullResult ¶
type SearchFullResult struct {
Domain string `json:"domain,omitempty"`
Path string `json:"path,omitempty"`
ContentType string `json:"content_type,omitempty"`
Category string `json:"category,omitempty"`
Size int `json:"size,omitempty"`
Content string `json:"content,omitempty"`
Suggestion string `json:"suggestion,omitempty"`
}
SearchFullResult contains the full content of the best search match.
type SearchResult ¶
type SearchResult struct {
Hits []SearchHit `json:"results"`
TotalCount int `json:"total_count"`
Offset int `json:"offset,omitempty"`
Limit int `json:"limit,omitempty"`
HasMore bool `json:"has_more,omitempty"`
Suggestion string `json:"suggestion,omitempty"`
}
SearchResult wraps search hits with metadata.
type SiteInfo ¶
type SiteInfo struct {
Domain string `json:"domain"`
URL string `json:"url"`
LastSync time.Time `json:"last_sync"`
FileCount int `json:"file_count"`
ContentTypes string `json:"content_types,omitempty"`
Age string `json:"age,omitempty"`
CategoryCounts map[string]int `json:"categories,omitempty"`
}
SiteInfo is returned when listing or describing a site.
type SiteStats ¶
type SiteStats struct {
Domain string `json:"domain"`
URL string `json:"url"`
FileCount int `json:"file_count"`
Size int64 `json:"size_bytes"`
SizeHuman string `json:"size"`
LastSync time.Time `json:"last_sync"`
Age string `json:"age,omitempty"` // human-readable time since last sync
}
SiteStats holds per-site statistics.
type Stats ¶
type Stats struct {
TotalSites int `json:"total_sites"`
TotalFiles int `json:"total_files"`
TotalSize int64 `json:"total_size_bytes"`
TotalSizeHuman string `json:"total_size"`
OldestSync time.Time `json:"oldest_sync"`
NewestSync time.Time `json:"newest_sync"`
SiteStats []SiteStats `json:"sites"`
}
Stats holds aggregate information about the workspace.
type SyncResult ¶
type SyncResult struct {
mirror.SyncResult
SyncTime time.Time `json:"sync_time"`
Committed bool `json:"committed"`
}
SyncResult wraps the mirror result with config updates.