Documentation
¶
Overview ¶
Package search provides URL scraping, issue search, and research agent types.
Public types: URLScraper, ScrapeResult, Issue, SimilarIssue, IssueIndex, ResearchAgent, ResearchQuery, ResearchResult, ResearchFinding.
Public functions: NewURLScraper, NewIssueIndex, NewResearchAgent, ExtractHTML, ExtractJSON, ExtractMarkdown, ExtractCode, SuggestResolution, FormatIssueResults, BuildSearchContext.
Index ¶
- func BuildSearchContext(similar []*SimilarIssue) string
- func ExtractCode(body string, rawURL string) string
- func ExtractHTML(body string) (title, content string)
- func ExtractJSON(body string) string
- func ExtractMarkdown(body string) string
- func FormatIssueResults(similar []*SimilarIssue) string
- func SuggestResolution(similar []*SimilarIssue) string
- type Issue
- type IssueIndex
- type ResearchAgent
- func (ra *ResearchAgent) DecomposeQuestion(question string) []string
- func (ra *ResearchAgent) FormatResult(result *ResearchResult) string
- func (ra *ResearchAgent) ParallelSearch(ctx context.Context, queries []string, searchFn func(string) (string, error)) []ResearchFinding
- func (ra *ResearchAgent) RankFindings(findings []ResearchFinding, query string) []ResearchFinding
- func (ra *ResearchAgent) Research(ctx context.Context, query ResearchQuery, ...) (*ResearchResult, error)
- func (ra *ResearchAgent) Synthesize(findings []ResearchFinding, query string) string
- type ResearchFinding
- type ResearchQuery
- type ResearchResult
- type ScrapeResult
- type SimilarIssue
- type URLScraper
- func (s *URLScraper) CacheGet(rawURL string) *ScrapeResult
- func (s *URLScraper) CacheSet(rawURL string, result *ScrapeResult)
- func (s *URLScraper) DetectURLs(text string) []string
- func (s *URLScraper) Fetch(ctx context.Context, rawURL string) (*ScrapeResult, error)
- func (s *URLScraper) FormatForContext(result *ScrapeResult, maxTokens int) string
- func (s *URLScraper) ShouldAutoFetch(rawURL string) bool
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func BuildSearchContext ¶
func BuildSearchContext(similar []*SimilarIssue) string
BuildSearchContext formats similar issues as context for agent injection. This is suitable for including in LLM prompts to provide relevant historical context.
func ExtractCode ¶
ExtractCode wraps raw code content in a fenced code block with language detection.
func ExtractHTML ¶
ExtractHTML strips tags and extracts readable text from HTML.
func ExtractJSON ¶
ExtractJSON pretty-prints JSON and truncates arrays to 5 elements.
func ExtractMarkdown ¶
ExtractMarkdown returns the markdown body, truncated if too long.
func FormatIssueResults ¶
func FormatIssueResults(similar []*SimilarIssue) string
FormatIssueResults produces a human-readable summary of similar issue search results.
func SuggestResolution ¶
func SuggestResolution(similar []*SimilarIssue) string
SuggestResolution generates a resolution suggestion based on similar closed issues.
Types ¶
type Issue ¶
type Issue struct {
ID string `json:"id"`
Title string `json:"title"`
Body string `json:"body"`
Labels []string `json:"labels"`
State string `json:"state"` // "open", "closed"
Resolution string `json:"resolution"` // how the issue was resolved
CreatedAt time.Time `json:"created_at"`
ClosedAt *time.Time `json:"closed_at"`
Tokens []string `json:"tokens"` // tokenized title+body for search
}
Issue represents a project issue (bug, feature request, etc.) for similarity search.
type IssueIndex ¶
type IssueIndex struct {
Issues []*Issue `json:"issues"`
InvertedIndex map[string][]int `json:"inverted_index"`
// contains filtered or unexported fields
}
IssueIndex provides BM25-based similarity search over a collection of issues.
func NewIssueIndex ¶
func NewIssueIndex() *IssueIndex
NewIssueIndex creates a new empty IssueIndex ready for use.
func (*IssueIndex) AddIssue ¶
func (idx *IssueIndex) AddIssue(issue *Issue)
AddIssue adds an issue to the index, tokenizing its title and body for search.
func (*IssueIndex) FindSimilar ¶
func (idx *IssueIndex) FindSimilar(query string, limit int) []*SimilarIssue
FindSimilar searches for issues similar to the given query using BM25 scoring. It returns up to limit results sorted by relevance score.
func (*IssueIndex) ImportFromCommits ¶
func (idx *IssueIndex) ImportFromCommits(projectDir string) error
ImportFromCommits extracts issue resolution info from git commit history. It parses "fixes #N" style references and maps them to commit messages.
func (*IssueIndex) ImportFromGitHub ¶
func (idx *IssueIndex) ImportFromGitHub(projectDir string) error
ImportFromGitHub imports issues from GitHub using the gh CLI tool. It requires the gh CLI to be installed and authenticated.
type ResearchAgent ¶
type ResearchAgent struct {
MaxWorkers int
Timeout time.Duration
Results []ResearchResult
// contains filtered or unexported fields
}
ResearchAgent gathers information from multiple sources in parallel, inspired by gpt-researcher's parallel crawler pattern.
func NewResearchAgent ¶
func NewResearchAgent(maxWorkers int) *ResearchAgent
NewResearchAgent creates a ResearchAgent with the given worker pool size. If maxWorkers <= 0, it defaults to 5.
func (*ResearchAgent) DecomposeQuestion ¶
func (ra *ResearchAgent) DecomposeQuestion(question string) []string
DecomposeQuestion breaks a complex question into searchable sub-queries.
func (*ResearchAgent) FormatResult ¶
func (ra *ResearchAgent) FormatResult(result *ResearchResult) string
FormatResult produces a human-readable formatted output of a ResearchResult.
func (*ResearchAgent) ParallelSearch ¶
func (ra *ResearchAgent) ParallelSearch(ctx context.Context, queries []string, searchFn func(string) (string, error)) []ResearchFinding
ParallelSearch runs searches concurrently using a worker pool and collects results.
func (*ResearchAgent) RankFindings ¶
func (ra *ResearchAgent) RankFindings(findings []ResearchFinding, query string) []ResearchFinding
RankFindings scores findings by relevance to the original query and returns them sorted.
func (*ResearchAgent) Research ¶
func (ra *ResearchAgent) Research(ctx context.Context, query ResearchQuery, searchFn func(string) (string, error)) (*ResearchResult, error)
Research executes a full research cycle: decompose, search in parallel, rank, and synthesize.
func (*ResearchAgent) Synthesize ¶
func (ra *ResearchAgent) Synthesize(findings []ResearchFinding, query string) string
Synthesize combines findings into a coherent summary, deduplicating overlapping information.
type ResearchFinding ¶
ResearchFinding represents a single piece of discovered information.
type ResearchQuery ¶
ResearchQuery defines a research task with a main question and optional sub-questions.
type ResearchResult ¶
type ResearchResult struct {
Query string
Findings []ResearchFinding
Sources []string
Duration time.Duration
TotalTokens int
}
ResearchResult holds the aggregated output of a research operation.
type ScrapeResult ¶
type ScrapeResult struct {
URL string
Title string
Content string
ContentType string // "html", "json", "text", "code", "markdown"
StatusCode int
FetchedAt time.Time
TokenEstimate int
}
ScrapeResult holds the extracted content from a fetched URL.
type SimilarIssue ¶
type SimilarIssue struct {
Issue *Issue `json:"issue"`
Score float64 `json:"score"`
MatchingTerms []string `json:"matching_terms"`
}
SimilarIssue represents a search result with similarity scoring.
type URLScraper ¶
type URLScraper struct {
Enabled bool
MaxSize int64
Timeout time.Duration
UserAgent string
Cache map[string]*ScrapeResult
// contains filtered or unexported fields
}
URLScraper detects URLs in conversation text and fetches/extracts their content.
func NewURLScraper ¶
func NewURLScraper() *URLScraper
NewURLScraper creates a URLScraper with default settings.
func (*URLScraper) CacheGet ¶
func (s *URLScraper) CacheGet(rawURL string) *ScrapeResult
CacheGet retrieves a cached ScrapeResult for the given URL.
func (*URLScraper) CacheSet ¶
func (s *URLScraper) CacheSet(rawURL string, result *ScrapeResult)
CacheSet stores a ScrapeResult in the cache.
func (*URLScraper) DetectURLs ¶
func (s *URLScraper) DetectURLs(text string) []string
DetectURLs finds all URLs in text, deduplicates them, and filters out binary URLs.
func (*URLScraper) Fetch ¶
func (s *URLScraper) Fetch(ctx context.Context, rawURL string) (*ScrapeResult, error)
Fetch retrieves the URL content, respecting timeout and size limits.
func (*URLScraper) FormatForContext ¶
func (s *URLScraper) FormatForContext(result *ScrapeResult, maxTokens int) string
FormatForContext formats a ScrapeResult for injection into agent context.
func (*URLScraper) ShouldAutoFetch ¶
func (s *URLScraper) ShouldAutoFetch(rawURL string) bool
ShouldAutoFetch determines whether a URL should be automatically fetched.