Documentation
¶
Index ¶
- func CreateMigrationGuide() string
- func EnsureDefaultStrategyConfig(config *ChunkerConfig)
- func IsLegacyConfig(config any) bool
- func ValidateAndFillDefaults(config *StrategyConfig) error
- func ValidateConfig(config *ChunkerConfig) error
- type AdvancedTableProcessor
- type Chunk
- type ChunkPool
- type ChunkPosition
- type ChunkerConfig
- type ChunkerError
- type ChunkerPool
- type ChunkingContext
- type ChunkingRule
- type ChunkingStrategy
- type CodeComplexityExtractor
- type ConcurrentChunker
- func (cc *ConcurrentChunker) ChunkDocument(content []byte) ([]Chunk, error)
- func (cc *ConcurrentChunker) ChunkDocumentBatch(contents [][]byte, maxConcurrency int) ([][]Chunk, []error)
- func (cc *ConcurrentChunker) ChunkDocumentConcurrent(contents [][]byte) ([][]Chunk, []error)
- func (cc *ConcurrentChunker) ClearErrors()
- func (cc *ConcurrentChunker) GetErrors() []*ChunkerError
- func (cc *ConcurrentChunker) GetPerformanceStats() PerformanceStats
- func (cc *ConcurrentChunker) ProcessDocumentsConcurrently(contents [][]byte, maxConcurrency int) (*ConcurrentProcessingStats, [][]Chunk, []error)
- type ConcurrentProcessingStats
- type ConfigMigrationResult
- type ConfigVersion
- type ContentSizeCondition
- type ContentTypeCondition
- type CreateSeparateChunkAction
- func (a *CreateSeparateChunkAction) Clone() RuleAction
- func (a *CreateSeparateChunkAction) Execute(node ast.Node, context *ChunkingContext) (*Chunk, error)
- func (a *CreateSeparateChunkAction) GetDescription() string
- func (a *CreateSeparateChunkAction) GetName() string
- func (a *CreateSeparateChunkAction) Validate() error
- type CustomStrategy
- func (s *CustomStrategy) ChunkDocument(doc ast.Node, source []byte, chunker *MarkdownChunker) ([]Chunk, error)
- func (s *CustomStrategy) Clone() ChunkingStrategy
- func (s *CustomStrategy) GetConfig() *StrategyConfig
- func (s *CustomStrategy) GetDescription() string
- func (s *CustomStrategy) GetEnabledRuleCount() int
- func (s *CustomStrategy) GetName() string
- func (s *CustomStrategy) GetRuleCount() int
- func (s *CustomStrategy) GetRules() []*ChunkingRule
- func (s *CustomStrategy) SetConfig(config *StrategyConfig) error
- func (s *CustomStrategy) String() string
- func (s *CustomStrategy) ValidateConfig(config *StrategyConfig) error
- type CustomStrategyBuilder
- func NewContentTypeBasedStrategyBuilder(name string, separateTypes, mergeTypes []string) *CustomStrategyBuilder
- func NewCustomStrategyBuilder(name, description string) *CustomStrategyBuilder
- func NewHeadingBasedStrategyBuilder(name string, maxLevel int) *CustomStrategyBuilder
- func NewSizeBasedStrategyBuilder(name string, minSize, maxSize int) *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) AddRule(name, description string, condition RuleCondition, action RuleAction, ...) *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) AddRuleObject(rule *ChunkingRule) *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) Build() (ChunkingStrategy, error)
- func (b *CustomStrategyBuilder) ClearRules() *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) Clone() *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) DisableRule(name string) *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) EnableRule(name string) *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) GetRule(name string) *ChunkingRule
- func (b *CustomStrategyBuilder) GetRuleCount() int
- func (b *CustomStrategyBuilder) GetRules() []*ChunkingRule
- func (b *CustomStrategyBuilder) HasRule(name string) bool
- func (b *CustomStrategyBuilder) RemoveRule(name string) *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) SetConfig(config *StrategyConfig) *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) SetDescription(description string) *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) SetName(name string) *CustomStrategyBuilder
- func (b *CustomStrategyBuilder) String() string
- func (b *CustomStrategyBuilder) Validate() error
- type DefaultErrorHandler
- func (h *DefaultErrorHandler) ClearErrors()
- func (h *DefaultErrorHandler) GetErrorCount() int
- func (h *DefaultErrorHandler) GetErrorCountByType(errorType ErrorType) int
- func (h *DefaultErrorHandler) GetErrors() []*ChunkerError
- func (h *DefaultErrorHandler) GetErrorsByType(errorType ErrorType) []*ChunkerError
- func (h *DefaultErrorHandler) HandleError(err *ChunkerError) error
- func (h *DefaultErrorHandler) HasErrors() bool
- func (h *DefaultErrorHandler) SetLogger(logger log.Logger)
- type DepthCondition
- type DocumentLevelStrategy
- func (s *DocumentLevelStrategy) ChunkDocument(doc ast.Node, source []byte, chunker *MarkdownChunker) ([]Chunk, error)
- func (s *DocumentLevelStrategy) Clone() ChunkingStrategy
- func (s *DocumentLevelStrategy) GetConfig() *StrategyConfig
- func (s *DocumentLevelStrategy) GetDescription() string
- func (s *DocumentLevelStrategy) GetName() string
- func (s *DocumentLevelStrategy) SetConfig(config *StrategyConfig) error
- func (s *DocumentLevelStrategy) ValidateConfig(config *StrategyConfig) error
- type ElementLevelStrategy
- func (s *ElementLevelStrategy) ChunkDocument(doc ast.Node, source []byte, chunker *MarkdownChunker) ([]Chunk, error)
- func (s *ElementLevelStrategy) Clone() ChunkingStrategy
- func (s *ElementLevelStrategy) GetConfig() *StrategyConfig
- func (s *ElementLevelStrategy) GetDescription() string
- func (s *ElementLevelStrategy) GetName() string
- func (s *ElementLevelStrategy) SetConfig(config *StrategyConfig) error
- func (s *ElementLevelStrategy) ValidateConfig(config *StrategyConfig) error
- type ErrorHandler
- type ErrorHandlingMode
- type ErrorType
- type HeadingLevelCondition
- type HierarchicalChunk
- type HierarchicalStrategy
- func (s *HierarchicalStrategy) ChunkDocument(doc ast.Node, source []byte, chunker *MarkdownChunker) ([]Chunk, error)
- func (s *HierarchicalStrategy) Clone() ChunkingStrategy
- func (s *HierarchicalStrategy) GetConfig() *StrategyConfig
- func (s *HierarchicalStrategy) GetDescription() string
- func (s *HierarchicalStrategy) GetName() string
- func (s *HierarchicalStrategy) SetConfig(config *StrategyConfig) error
- func (s *HierarchicalStrategy) ValidateConfig(config *StrategyConfig) error
- type Image
- type ImageExtractor
- type LegacyChunkerConfig
- type Link
- type LinkExtractor
- type LogContext
- func (lc *LogContext) ToLogFields() []any
- func (lc *LogContext) WithCodeInfo(language string, lineCount int, codeBlockType string) *LogContext
- func (lc *LogContext) WithContentInfo(contentLength, textLength, wordCount int) *LogContext
- func (lc *LogContext) WithDocumentInfo(documentSize, chunkCount int) *LogContext
- func (lc *LogContext) WithHeadingInfo(level, wordCount int) *LogContext
- func (lc *LogContext) WithLinksAndImages(linksCount, imagesCount int) *LogContext
- func (lc *LogContext) WithListInfo(listType string, itemCount int) *LogContext
- func (lc *LogContext) WithMetadata(key string, value any) *LogContext
- func (lc *LogContext) WithNodeInfo(nodeType string, nodeID int) *LogContext
- func (lc *LogContext) WithPositionInfo(startLine, endLine, startCol, endCol int) *LogContext
- func (lc *LogContext) WithProcessTime(duration time.Duration) *LogContext
- func (lc *LogContext) WithTableInfo(rowCount, columnCount int, isWellFormed bool) *LogContext
- type MarkdownChunker
- func (c *MarkdownChunker) ChunkDocument(content []byte) ([]Chunk, error)
- func (c *MarkdownChunker) ClearErrors()
- func (c *MarkdownChunker) ClearStrategyCache()
- func (c *MarkdownChunker) GetAvailableStrategies() []string
- func (c *MarkdownChunker) GetCacheStats() map[string]any
- func (c *MarkdownChunker) GetCurrentStrategy() (string, string)
- func (c *MarkdownChunker) GetErrors() []*ChunkerError
- func (c *MarkdownChunker) GetErrorsByType(errorType ErrorType) []*ChunkerError
- func (c *MarkdownChunker) GetPerformanceMonitor() *PerformanceMonitor
- func (c *MarkdownChunker) GetPerformanceStats() PerformanceStats
- func (c *MarkdownChunker) GetStrategyConfig() *StrategyConfig
- func (c *MarkdownChunker) GetStrategyCount() int
- func (c *MarkdownChunker) HasErrors() bool
- func (c *MarkdownChunker) HasStrategy(strategyName string) bool
- func (c *MarkdownChunker) RegisterStrategy(strategy ChunkingStrategy) error
- func (c *MarkdownChunker) ResetPerformanceMonitor()
- func (c *MarkdownChunker) SetStrategy(strategyName string, config *StrategyConfig) error
- func (c *MarkdownChunker) UnregisterStrategy(strategyName string) error
- func (c *MarkdownChunker) UpdateStrategyConfig(config *StrategyConfig) error
- type MemoryLimiter
- type MemoryOptimizer
- func (mo *MemoryOptimizer) CheckMemoryLimit() error
- func (mo *MemoryOptimizer) ForceGC()
- func (mo *MemoryOptimizer) GetChunk() *Chunk
- func (mo *MemoryOptimizer) GetGCThreshold() int64
- func (mo *MemoryOptimizer) GetMemoryStats() MemoryOptimizerStats
- func (mo *MemoryOptimizer) GetStringBuilder() *strings.Builder
- func (mo *MemoryOptimizer) PutChunk(chunk *Chunk)
- func (mo *MemoryOptimizer) PutStringBuilder(sb *strings.Builder)
- func (mo *MemoryOptimizer) RecordProcessedBytes(bytes int64)
- func (mo *MemoryOptimizer) Reset()
- func (mo *MemoryOptimizer) SetGCThreshold(threshold int64)
- func (mo *MemoryOptimizer) SetLogger(logger log.Logger)
- type MemoryOptimizerStats
- type MergeWithParentAction
- func (a *MergeWithParentAction) Clone() RuleAction
- func (a *MergeWithParentAction) Execute(node ast.Node, context *ChunkingContext) (*Chunk, error)
- func (a *MergeWithParentAction) GetDescription() string
- func (a *MergeWithParentAction) GetName() string
- func (a *MergeWithParentAction) Validate() error
- type MetadataExtractor
- type ObjectPool
- type OptimizedStringOperations
- type PerformanceMode
- type PerformanceMonitor
- func (pm *PerformanceMonitor) CheckMemoryThresholds()
- func (pm *PerformanceMonitor) ForceGC()
- func (pm *PerformanceMonitor) GetMemoryStats() runtime.MemStats
- func (pm *PerformanceMonitor) GetStats() PerformanceStats
- func (pm *PerformanceMonitor) IsRunning() bool
- func (pm *PerformanceMonitor) RecordBytes(bytes int64)
- func (pm *PerformanceMonitor) RecordChunk(chunk *Chunk)
- func (pm *PerformanceMonitor) RecordStrategyExecution(strategyName string, executionTime time.Duration, chunksGenerated int)
- func (pm *PerformanceMonitor) Reset()
- func (pm *PerformanceMonitor) SetLogger(logger log.Logger)
- func (pm *PerformanceMonitor) Start()
- func (pm *PerformanceMonitor) Stop()
- type PerformanceStats
- type ProcessingJob
- type ProcessingResult
- type RuleAction
- type RuleCondition
- type SkipNodeAction
- type StrategyCache
- type StrategyConfig
- func CreateConfigFromParameters(strategyName string, params map[string]any) (*StrategyConfig, error)
- func DefaultStrategyConfig(name string) *StrategyConfig
- func DocumentLevelConfig() *StrategyConfig
- func DocumentLevelConfigWithSize(minSize, maxSize int) *StrategyConfig
- func ElementLevelConfig() *StrategyConfig
- func ElementLevelConfigWithSize(minSize, maxSize int) *StrategyConfig
- func ElementLevelConfigWithTypes(includeTypes, excludeTypes []string) *StrategyConfig
- func HierarchicalConfig(maxDepth int) *StrategyConfig
- func HierarchicalConfigAdvanced(maxDepth, minDepth int, mergeEmpty bool) *StrategyConfig
- func HierarchicalConfigWithSize(maxDepth, minSize, maxSize int) *StrategyConfig
- func MergeConfigs(base, override *StrategyConfig) (*StrategyConfig, error)
- type StrategyPool
- func (sp *StrategyPool) Clear()
- func (sp *StrategyPool) CreatePool(strategyName string, factory func() ChunkingStrategy)
- func (sp *StrategyPool) Get(strategyName string, factory func() ChunkingStrategy) ChunkingStrategy
- func (sp *StrategyPool) GetPoolCount() int
- func (sp *StrategyPool) HasPool(strategyName string) bool
- func (sp *StrategyPool) Put(strategy ChunkingStrategy)
- func (sp *StrategyPool) RemovePool(strategyName string)
- type StrategyRegistry
- func (sr *StrategyRegistry) Get(name string) (ChunkingStrategy, error)
- func (sr *StrategyRegistry) GetStrategyCount() int
- func (sr *StrategyRegistry) HasStrategy(name string) bool
- func (sr *StrategyRegistry) List() []string
- func (sr *StrategyRegistry) Register(strategy ChunkingStrategy) error
- func (sr *StrategyRegistry) Unregister(name string) error
- type StringBuilderPool
- type TableInfo
- type WorkerPool
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CreateMigrationGuide ¶ added in v1.2.0
func CreateMigrationGuide() string
CreateMigrationGuide 创建迁移指南
func EnsureDefaultStrategyConfig ¶ added in v1.2.0
func EnsureDefaultStrategyConfig(config *ChunkerConfig)
EnsureDefaultStrategyConfig 确保配置中有有效的策略配置
func IsLegacyConfig ¶ added in v1.2.0
IsLegacyConfig 检查配置是否为旧版本配置
func ValidateAndFillDefaults ¶ added in v1.2.0
func ValidateAndFillDefaults(config *StrategyConfig) error
ValidateAndFillDefaults 验证策略配置并填充默认值
Types ¶
type AdvancedTableProcessor ¶
type AdvancedTableProcessor struct {
// contains filtered or unexported fields
}
AdvancedTableProcessor 高级表格处理器
func NewAdvancedTableProcessor ¶
func NewAdvancedTableProcessor(source []byte) *AdvancedTableProcessor
NewAdvancedTableProcessor 创建高级表格处理器
func (*AdvancedTableProcessor) ProcessTable ¶
func (p *AdvancedTableProcessor) ProcessTable(table *extast.Table) *TableInfo
ProcessTable 处理表格并返回详细信息
type Chunk ¶
type Chunk struct {
ID int `json:"id"`
Type string `json:"type"` // heading, paragraph, table, code, list
Content string `json:"content"` // 原始 markdown 内容
Text string `json:"text"` // 纯文本内容,用于向量化
Level int `json:"level"` // 标题层级 (仅对 heading 有效)
Metadata map[string]string `json:"metadata"`
Position ChunkPosition `json:"position"` // 在文档中的位置
Links []Link `json:"links"` // 包含的链接
Images []Image `json:"images"` // 包含的图片
Hash string `json:"hash"` // 内容哈希,用于去重
}
Chunk 表示分块后的内容
type ChunkPosition ¶
type ChunkPosition struct {
StartLine int `json:"start_line"` // 起始行号(从1开始)
EndLine int `json:"end_line"` // 结束行号(从1开始)
StartCol int `json:"start_col"` // 起始列号(从1开始)
EndCol int `json:"end_col"` // 结束列号(从1开始)
}
ChunkPosition 表示块在文档中的位置
type ChunkerConfig ¶
type ChunkerConfig struct {
// MaxChunkSize 最大块大小(字符数),0表示无限制
MaxChunkSize int
// EnabledTypes 启用的内容类型,nil表示启用所有类型
EnabledTypes map[string]bool
// CustomExtractors 自定义元数据提取器
CustomExtractors []MetadataExtractor
// ErrorHandling 错误处理模式
ErrorHandling ErrorHandlingMode
// PerformanceMode 性能模式
PerformanceMode PerformanceMode
// FilterEmptyChunks 是否过滤空块
FilterEmptyChunks bool
// PreserveWhitespace 是否保留空白字符
PreserveWhitespace bool
// MemoryLimit 内存使用限制(字节),0表示无限制
MemoryLimit int64
// EnableObjectPooling 是否启用对象池化
EnableObjectPooling bool
// 日志配置
LogLevel string `json:"log_level"` // DEBUG, INFO, WARN, ERROR
EnableLog bool `json:"enable_log"` // 是否启用日志
LogFormat string `json:"log_format"` // 日志格式 (json, console)
LogDirectory string `json:"log_directory"` // 日志文件目录
// 策略配置
ChunkingStrategy *StrategyConfig `json:"chunking_strategy,omitempty"` // 分块策略配置
}
ChunkerConfig 分块器配置
type ChunkerError ¶
type ChunkerError struct {
Type ErrorType `json:"type"`
Message string `json:"message"`
Context map[string]any `json:"context"`
Cause error `json:"cause,omitempty"`
Timestamp time.Time `json:"timestamp"`
}
ChunkerError 分块器错误
func NewChunkerError ¶
func NewChunkerError(errorType ErrorType, message string, cause error) *ChunkerError
NewChunkerError 创建新的分块器错误
func (*ChunkerError) WithContext ¶
func (e *ChunkerError) WithContext(key string, value any) *ChunkerError
WithContext 添加上下文信息
type ChunkerPool ¶
type ChunkerPool struct {
// contains filtered or unexported fields
}
ChunkerPool 分块器对象池,用于并发处理
func NewChunkerPool ¶
func NewChunkerPool(config *ChunkerConfig) *ChunkerPool
NewChunkerPool 创建新的分块器对象池
type ChunkingContext ¶ added in v1.2.0
type ChunkingContext struct {
CurrentChunk *Chunk // 当前正在处理的块
PreviousChunk *Chunk // 前一个块
ParentNode ast.Node // 父节点
Depth int // 当前深度
ChunkCount int // 已处理的块数量
TotalNodes int // 总节点数
Source []byte // 源文档内容
Chunker *MarkdownChunker // 分块器实例
CustomData map[string]any // 自定义数据
ProcessingTime time.Duration // 处理时间
}
ChunkingContext 分块上下文,提供分块过程中的状态信息
func NewChunkingContext ¶ added in v1.2.0
func NewChunkingContext(chunker *MarkdownChunker, source []byte) *ChunkingContext
NewChunkingContext 创建新的分块上下文
func (*ChunkingContext) Clone ¶ added in v1.2.0
func (ctx *ChunkingContext) Clone() *ChunkingContext
Clone 创建上下文的副本
func (*ChunkingContext) GetCustomData ¶ added in v1.2.0
func (ctx *ChunkingContext) GetCustomData(key string) (any, bool)
GetCustomData 获取自定义数据
func (*ChunkingContext) SetCustomData ¶ added in v1.2.0
func (ctx *ChunkingContext) SetCustomData(key string, value any)
SetCustomData 设置自定义数据
type ChunkingRule ¶ added in v1.2.0
type ChunkingRule struct {
Name string `json:"name"` // 规则名称
Description string `json:"description"` // 规则描述
Condition RuleCondition `json:"-"` // 规则条件(不序列化)
Action RuleAction `json:"-"` // 规则动作(不序列化)
Priority int `json:"priority"` // 规则优先级(数值越大优先级越高)
Enabled bool `json:"enabled"` // 是否启用
}
ChunkingRule 分块规则 将条件和动作组合成完整的规则
func NewChunkingRule ¶ added in v1.2.0
func NewChunkingRule(name, description string, condition RuleCondition, action RuleAction, priority int) *ChunkingRule
NewChunkingRule 创建新的分块规则
func (*ChunkingRule) Clone ¶ added in v1.2.0
func (r *ChunkingRule) Clone() *ChunkingRule
Clone 创建规则的副本
func (*ChunkingRule) Execute ¶ added in v1.2.0
func (r *ChunkingRule) Execute(node ast.Node, context *ChunkingContext) (*Chunk, error)
Execute 执行规则动作
func (*ChunkingRule) Match ¶ added in v1.2.0
func (r *ChunkingRule) Match(node ast.Node, context *ChunkingContext) bool
Match 检查规则是否匹配
func (*ChunkingRule) String ¶ added in v1.2.0
func (r *ChunkingRule) String() string
String 返回规则的字符串表示
func (*ChunkingRule) Validate ¶ added in v1.2.0
func (r *ChunkingRule) Validate() error
Validate 验证规则配置
type ChunkingStrategy ¶ added in v1.2.0
type ChunkingStrategy interface {
// GetName 返回策略名称
GetName() string
// GetDescription 返回策略描述
GetDescription() string
// ChunkDocument 使用该策略对文档进行分块
ChunkDocument(doc ast.Node, source []byte, chunker *MarkdownChunker) ([]Chunk, error)
// ValidateConfig 验证策略特定的配置
ValidateConfig(config *StrategyConfig) error
// Clone 创建策略的副本(用于并发安全)
Clone() ChunkingStrategy
}
ChunkingStrategy 定义分块策略的核心接口
type CodeComplexityExtractor ¶
type CodeComplexityExtractor struct{}
CodeComplexityExtractor 代码复杂度分析提取器
func (*CodeComplexityExtractor) SupportedTypes ¶
func (e *CodeComplexityExtractor) SupportedTypes() []string
SupportedTypes 返回支持的内容类型
type ConcurrentChunker ¶
type ConcurrentChunker struct {
// contains filtered or unexported fields
}
ConcurrentChunker 并发安全的分块器包装器
func NewConcurrentChunker ¶
func NewConcurrentChunker(config *ChunkerConfig) *ConcurrentChunker
NewConcurrentChunker 创建新的并发安全分块器
func (*ConcurrentChunker) ChunkDocument ¶
func (cc *ConcurrentChunker) ChunkDocument(content []byte) ([]Chunk, error)
ChunkDocument 线程安全的文档分块方法
func (*ConcurrentChunker) ChunkDocumentBatch ¶
func (cc *ConcurrentChunker) ChunkDocumentBatch(contents [][]byte, maxConcurrency int) ([][]Chunk, []error)
ChunkDocumentBatch 批量处理文档,支持并发控制
func (*ConcurrentChunker) ChunkDocumentConcurrent ¶
func (cc *ConcurrentChunker) ChunkDocumentConcurrent(contents [][]byte) ([][]Chunk, []error)
ChunkDocumentConcurrent 并发处理多个文档
func (*ConcurrentChunker) ClearErrors ¶
func (cc *ConcurrentChunker) ClearErrors()
ClearErrors 清除错误信息(线程安全)
func (*ConcurrentChunker) GetErrors ¶
func (cc *ConcurrentChunker) GetErrors() []*ChunkerError
GetErrors 获取错误信息(线程安全)
func (*ConcurrentChunker) GetPerformanceStats ¶
func (cc *ConcurrentChunker) GetPerformanceStats() PerformanceStats
GetPerformanceStats 获取性能统计信息(线程安全)
func (*ConcurrentChunker) ProcessDocumentsConcurrently ¶
func (cc *ConcurrentChunker) ProcessDocumentsConcurrently(contents [][]byte, maxConcurrency int) (*ConcurrentProcessingStats, [][]Chunk, []error)
ProcessDocumentsConcurrently 并发处理文档并收集统计信息
type ConcurrentProcessingStats ¶
type ConcurrentProcessingStats struct {
TotalDocuments int `json:"total_documents"` // 总文档数
ProcessedDocuments int `json:"processed_documents"` // 已处理文档数
FailedDocuments int `json:"failed_documents"` // 失败文档数
TotalChunks int `json:"total_chunks"` // 总块数
ProcessingTime time.Duration `json:"processing_time"` // 总处理时间
AverageTime time.Duration `json:"average_time"` // 平均处理时间
Concurrency int `json:"concurrency"` // 并发度
ThroughputDocs float64 `json:"throughput_docs"` // 文档吞吐量(文档/秒)
ThroughputChunks float64 `json:"throughput_chunks"` // 块吞吐量(块/秒)
}
ConcurrentProcessingStats 并发处理统计信息
type ConfigMigrationResult ¶ added in v1.2.0
type ConfigMigrationResult struct {
// 迁移后的配置
Config *ChunkerConfig `json:"config"`
// 是否进行了迁移
Migrated bool `json:"migrated"`
// 原始版本
OriginalVersion ConfigVersion `json:"original_version"`
// 目标版本
TargetVersion ConfigVersion `json:"target_version"`
// 迁移警告
Warnings []string `json:"warnings"`
// 迁移说明
Notes []string `json:"notes"`
}
ConfigMigrationResult 配置迁移结果
func MigrateConfig ¶ added in v1.2.0
func MigrateConfig(config any) (*ConfigMigrationResult, error)
MigrateConfig 迁移配置到最新版本 这个函数处理从旧版本配置到新版本配置的迁移
func MigrateConfigWithLogger ¶ added in v1.2.0
func MigrateConfigWithLogger(config any, logger log.Logger) (*ConfigMigrationResult, error)
MigrateConfigWithLogger 带日志记录的配置迁移
type ConfigVersion ¶ added in v1.2.0
type ConfigVersion string
ConfigVersion 表示配置版本
const ( // ConfigVersionV1 版本1配置(策略系统之前) ConfigVersionV1 ConfigVersion = "v1" // ConfigVersionV2 版本2配置(策略系统) ConfigVersionV2 ConfigVersion = "v2" )
func GetConfigVersion ¶ added in v1.2.0
func GetConfigVersion(config any) ConfigVersion
GetConfigVersion 获取配置版本
type ContentSizeCondition ¶ added in v1.2.0
type ContentSizeCondition struct {
MinSize int `json:"min_size"` // 最小内容大小(字符数)
MaxSize int `json:"max_size"` // 最大内容大小(字符数,0表示无限制)
}
ContentSizeCondition 内容大小条件 匹配指定大小范围内的内容
func NewContentSizeCondition ¶ added in v1.2.0
func NewContentSizeCondition(minSize, maxSize int) *ContentSizeCondition
NewContentSizeCondition 创建内容大小条件
func (*ContentSizeCondition) Clone ¶ added in v1.2.0
func (c *ContentSizeCondition) Clone() RuleCondition
Clone 创建条件的副本
func (*ContentSizeCondition) GetDescription ¶ added in v1.2.0
func (c *ContentSizeCondition) GetDescription() string
GetDescription 返回条件描述
func (*ContentSizeCondition) GetName ¶ added in v1.2.0
func (c *ContentSizeCondition) GetName() string
GetName 返回条件名称
func (*ContentSizeCondition) Match ¶ added in v1.2.0
func (c *ContentSizeCondition) Match(node ast.Node, context *ChunkingContext) bool
Match 检查节点是否匹配内容大小条件
func (*ContentSizeCondition) Validate ¶ added in v1.2.0
func (c *ContentSizeCondition) Validate() error
Validate 验证条件配置
type ContentTypeCondition ¶ added in v1.2.0
type ContentTypeCondition struct {
Types []string `json:"types"` // 允许的内容类型列表
}
ContentTypeCondition 内容类型条件 匹配指定类型的内容节点
func NewContentTypeCondition ¶ added in v1.2.0
func NewContentTypeCondition(types ...string) *ContentTypeCondition
NewContentTypeCondition 创建内容类型条件
func (*ContentTypeCondition) Clone ¶ added in v1.2.0
func (c *ContentTypeCondition) Clone() RuleCondition
Clone 创建条件的副本
func (*ContentTypeCondition) GetDescription ¶ added in v1.2.0
func (c *ContentTypeCondition) GetDescription() string
GetDescription 返回条件描述
func (*ContentTypeCondition) GetName ¶ added in v1.2.0
func (c *ContentTypeCondition) GetName() string
GetName 返回条件名称
func (*ContentTypeCondition) Match ¶ added in v1.2.0
func (c *ContentTypeCondition) Match(node ast.Node, context *ChunkingContext) bool
Match 检查节点是否匹配内容类型条件
func (*ContentTypeCondition) Validate ¶ added in v1.2.0
func (c *ContentTypeCondition) Validate() error
Validate 验证条件配置
type CreateSeparateChunkAction ¶ added in v1.2.0
type CreateSeparateChunkAction struct {
ChunkType string `json:"chunk_type"` // 块类型(可选,为空时使用节点类型)
Metadata map[string]string `json:"metadata"` // 附加元数据
}
CreateSeparateChunkAction 创建独立块动作 将匹配的节点创建为独立的块
func NewCreateSeparateChunkAction ¶ added in v1.2.0
func NewCreateSeparateChunkAction(chunkType string, metadata map[string]string) *CreateSeparateChunkAction
NewCreateSeparateChunkAction 创建独立块动作
func (*CreateSeparateChunkAction) Clone ¶ added in v1.2.0
func (a *CreateSeparateChunkAction) Clone() RuleAction
Clone 创建动作的副本
func (*CreateSeparateChunkAction) Execute ¶ added in v1.2.0
func (a *CreateSeparateChunkAction) Execute(node ast.Node, context *ChunkingContext) (*Chunk, error)
Execute 执行创建独立块动作
func (*CreateSeparateChunkAction) GetDescription ¶ added in v1.2.0
func (a *CreateSeparateChunkAction) GetDescription() string
GetDescription 返回动作描述
func (*CreateSeparateChunkAction) GetName ¶ added in v1.2.0
func (a *CreateSeparateChunkAction) GetName() string
GetName 返回动作名称
func (*CreateSeparateChunkAction) Validate ¶ added in v1.2.0
func (a *CreateSeparateChunkAction) Validate() error
Validate 验证动作配置
type CustomStrategy ¶ added in v1.2.0
type CustomStrategy struct {
Name string `json:"name"` // 策略名称
Description string `json:"description"` // 策略描述
Rules []*ChunkingRule `json:"rules"` // 规则列表(按优先级排序)
Config *StrategyConfig `json:"config"` // 策略配置
// contains filtered or unexported fields
}
CustomStrategy 自定义分块策略 基于规则的自定义分块策略实现
func (*CustomStrategy) ChunkDocument ¶ added in v1.2.0
func (s *CustomStrategy) ChunkDocument(doc ast.Node, source []byte, chunker *MarkdownChunker) ([]Chunk, error)
ChunkDocument 使用自定义策略对文档进行分块
func (*CustomStrategy) Clone ¶ added in v1.2.0
func (s *CustomStrategy) Clone() ChunkingStrategy
Clone 创建策略的副本(用于并发安全)
func (*CustomStrategy) GetConfig ¶ added in v1.2.0
func (s *CustomStrategy) GetConfig() *StrategyConfig
GetConfig 获取策略配置
func (*CustomStrategy) GetDescription ¶ added in v1.2.0
func (s *CustomStrategy) GetDescription() string
GetDescription 返回策略描述
func (*CustomStrategy) GetEnabledRuleCount ¶ added in v1.2.0
func (s *CustomStrategy) GetEnabledRuleCount() int
GetEnabledRuleCount 获取启用的规则数量
func (*CustomStrategy) GetName ¶ added in v1.2.0
func (s *CustomStrategy) GetName() string
GetName 返回策略名称
func (*CustomStrategy) GetRuleCount ¶ added in v1.2.0
func (s *CustomStrategy) GetRuleCount() int
GetRuleCount 获取规则数量
func (*CustomStrategy) GetRules ¶ added in v1.2.0
func (s *CustomStrategy) GetRules() []*ChunkingRule
GetRules 获取所有规则
func (*CustomStrategy) SetConfig ¶ added in v1.2.0
func (s *CustomStrategy) SetConfig(config *StrategyConfig) error
SetConfig 设置策略配置
func (*CustomStrategy) String ¶ added in v1.2.0
func (s *CustomStrategy) String() string
String 返回策略的字符串表示
func (*CustomStrategy) ValidateConfig ¶ added in v1.2.0
func (s *CustomStrategy) ValidateConfig(config *StrategyConfig) error
ValidateConfig 验证策略特定的配置
type CustomStrategyBuilder ¶ added in v1.2.0
type CustomStrategyBuilder struct {
Name string `json:"name"` // 策略名称
Description string `json:"description"` // 策略描述
Rules []*ChunkingRule `json:"rules"` // 规则列表
Config *StrategyConfig `json:"config"` // 策略配置
// contains filtered or unexported fields
}
CustomStrategyBuilder 自定义策略构建器 用于构建基于规则的自定义分块策略
func NewContentTypeBasedStrategyBuilder ¶ added in v1.2.0
func NewContentTypeBasedStrategyBuilder(name string, separateTypes, mergeTypes []string) *CustomStrategyBuilder
NewContentTypeBasedStrategyBuilder 创建基于内容类型的策略构建器
func NewCustomStrategyBuilder ¶ added in v1.2.0
func NewCustomStrategyBuilder(name, description string) *CustomStrategyBuilder
NewCustomStrategyBuilder 创建新的自定义策略构建器
func NewHeadingBasedStrategyBuilder ¶ added in v1.2.0
func NewHeadingBasedStrategyBuilder(name string, maxLevel int) *CustomStrategyBuilder
NewHeadingBasedStrategyBuilder 创建基于标题的策略构建器
func NewSizeBasedStrategyBuilder ¶ added in v1.2.0
func NewSizeBasedStrategyBuilder(name string, minSize, maxSize int) *CustomStrategyBuilder
NewSizeBasedStrategyBuilder 创建基于大小的策略构建器
func (*CustomStrategyBuilder) AddRule ¶ added in v1.2.0
func (b *CustomStrategyBuilder) AddRule(name, description string, condition RuleCondition, action RuleAction, priority int) *CustomStrategyBuilder
AddRule 添加分块规则
func (*CustomStrategyBuilder) AddRuleObject ¶ added in v1.2.0
func (b *CustomStrategyBuilder) AddRuleObject(rule *ChunkingRule) *CustomStrategyBuilder
AddRuleObject 添加规则对象
func (*CustomStrategyBuilder) Build ¶ added in v1.2.0
func (b *CustomStrategyBuilder) Build() (ChunkingStrategy, error)
Build 构建自定义策略
func (*CustomStrategyBuilder) ClearRules ¶ added in v1.2.0
func (b *CustomStrategyBuilder) ClearRules() *CustomStrategyBuilder
ClearRules 清空所有规则
func (*CustomStrategyBuilder) Clone ¶ added in v1.2.0
func (b *CustomStrategyBuilder) Clone() *CustomStrategyBuilder
Clone 创建构建器的副本
func (*CustomStrategyBuilder) DisableRule ¶ added in v1.2.0
func (b *CustomStrategyBuilder) DisableRule(name string) *CustomStrategyBuilder
DisableRule 禁用指定名称的规则
func (*CustomStrategyBuilder) EnableRule ¶ added in v1.2.0
func (b *CustomStrategyBuilder) EnableRule(name string) *CustomStrategyBuilder
EnableRule 启用指定名称的规则
func (*CustomStrategyBuilder) GetRule ¶ added in v1.2.0
func (b *CustomStrategyBuilder) GetRule(name string) *ChunkingRule
GetRule 获取指定名称的规则
func (*CustomStrategyBuilder) GetRuleCount ¶ added in v1.2.0
func (b *CustomStrategyBuilder) GetRuleCount() int
GetRuleCount 获取规则数量
func (*CustomStrategyBuilder) GetRules ¶ added in v1.2.0
func (b *CustomStrategyBuilder) GetRules() []*ChunkingRule
GetRules 获取所有规则
func (*CustomStrategyBuilder) HasRule ¶ added in v1.2.0
func (b *CustomStrategyBuilder) HasRule(name string) bool
HasRule 检查是否存在指定名称的规则
func (*CustomStrategyBuilder) RemoveRule ¶ added in v1.2.0
func (b *CustomStrategyBuilder) RemoveRule(name string) *CustomStrategyBuilder
RemoveRule 移除指定名称的规则
func (*CustomStrategyBuilder) SetConfig ¶ added in v1.2.0
func (b *CustomStrategyBuilder) SetConfig(config *StrategyConfig) *CustomStrategyBuilder
SetConfig 设置策略配置
func (*CustomStrategyBuilder) SetDescription ¶ added in v1.2.0
func (b *CustomStrategyBuilder) SetDescription(description string) *CustomStrategyBuilder
SetDescription 设置策略描述
func (*CustomStrategyBuilder) SetName ¶ added in v1.2.0
func (b *CustomStrategyBuilder) SetName(name string) *CustomStrategyBuilder
SetName 设置策略名称
func (*CustomStrategyBuilder) String ¶ added in v1.2.0
func (b *CustomStrategyBuilder) String() string
String 返回构建器的字符串表示
func (*CustomStrategyBuilder) Validate ¶ added in v1.2.0
func (b *CustomStrategyBuilder) Validate() error
Validate 验证策略构建器配置
type DefaultErrorHandler ¶
type DefaultErrorHandler struct {
// contains filtered or unexported fields
}
DefaultErrorHandler 默认错误处理器
func NewDefaultErrorHandler ¶
func NewDefaultErrorHandler(mode ErrorHandlingMode) *DefaultErrorHandler
NewDefaultErrorHandler 创建默认错误处理器
func NewDefaultErrorHandlerWithLogger ¶ added in v1.1.0
func NewDefaultErrorHandlerWithLogger(mode ErrorHandlingMode, logger log.Logger) *DefaultErrorHandler
NewDefaultErrorHandlerWithLogger 创建带日志器的默认错误处理器
func (*DefaultErrorHandler) ClearErrors ¶
func (h *DefaultErrorHandler) ClearErrors()
ClearErrors 清除所有错误
func (*DefaultErrorHandler) GetErrorCount ¶
func (h *DefaultErrorHandler) GetErrorCount() int
GetErrorCount 获取错误数量
func (*DefaultErrorHandler) GetErrorCountByType ¶
func (h *DefaultErrorHandler) GetErrorCountByType(errorType ErrorType) int
GetErrorCountByType 按类型获取错误数量
func (*DefaultErrorHandler) GetErrors ¶
func (h *DefaultErrorHandler) GetErrors() []*ChunkerError
GetErrors 获取所有错误
func (*DefaultErrorHandler) GetErrorsByType ¶
func (h *DefaultErrorHandler) GetErrorsByType(errorType ErrorType) []*ChunkerError
GetErrorsByType 按类型获取错误
func (*DefaultErrorHandler) HandleError ¶
func (h *DefaultErrorHandler) HandleError(err *ChunkerError) error
HandleError 处理错误
func (*DefaultErrorHandler) HasErrors ¶
func (h *DefaultErrorHandler) HasErrors() bool
HasErrors 检查是否有错误
func (*DefaultErrorHandler) SetLogger ¶ added in v1.1.0
func (h *DefaultErrorHandler) SetLogger(logger log.Logger)
SetLogger 设置日志器
type DepthCondition ¶ added in v1.2.0
type DepthCondition struct {
MinDepth int `json:"min_depth"` // 最小深度
MaxDepth int `json:"max_depth"` // 最大深度(0表示无限制)
}
DepthCondition 深度条件 匹配指定深度范围内的节点
func NewDepthCondition ¶ added in v1.2.0
func NewDepthCondition(minDepth, maxDepth int) *DepthCondition
NewDepthCondition 创建深度条件
func (*DepthCondition) Clone ¶ added in v1.2.0
func (c *DepthCondition) Clone() RuleCondition
Clone 创建条件的副本
func (*DepthCondition) GetDescription ¶ added in v1.2.0
func (c *DepthCondition) GetDescription() string
GetDescription 返回条件描述
func (*DepthCondition) GetName ¶ added in v1.2.0
func (c *DepthCondition) GetName() string
GetName 返回条件名称
func (*DepthCondition) Match ¶ added in v1.2.0
func (c *DepthCondition) Match(node ast.Node, context *ChunkingContext) bool
Match 检查节点是否匹配深度条件
func (*DepthCondition) Validate ¶ added in v1.2.0
func (c *DepthCondition) Validate() error
Validate 验证条件配置
type DocumentLevelStrategy ¶ added in v1.2.0
type DocumentLevelStrategy struct {
// contains filtered or unexported fields
}
DocumentLevelStrategy 文档级分块策略 将整个文档作为单个块处理
func NewDocumentLevelStrategy ¶ added in v1.2.0
func NewDocumentLevelStrategy() *DocumentLevelStrategy
NewDocumentLevelStrategy 创建新的文档级分块策略
func NewDocumentLevelStrategyWithConfig ¶ added in v1.2.0
func NewDocumentLevelStrategyWithConfig(config *StrategyConfig) *DocumentLevelStrategy
NewDocumentLevelStrategyWithConfig 使用指定配置创建文档级分块策略
func (*DocumentLevelStrategy) ChunkDocument ¶ added in v1.2.0
func (s *DocumentLevelStrategy) ChunkDocument(doc ast.Node, source []byte, chunker *MarkdownChunker) ([]Chunk, error)
ChunkDocument 使用文档级策略对文档进行分块
func (*DocumentLevelStrategy) Clone ¶ added in v1.2.0
func (s *DocumentLevelStrategy) Clone() ChunkingStrategy
Clone 创建策略的副本(用于并发安全)
func (*DocumentLevelStrategy) GetConfig ¶ added in v1.2.0
func (s *DocumentLevelStrategy) GetConfig() *StrategyConfig
GetConfig 获取策略配置
func (*DocumentLevelStrategy) GetDescription ¶ added in v1.2.0
func (s *DocumentLevelStrategy) GetDescription() string
GetDescription 返回策略描述
func (*DocumentLevelStrategy) GetName ¶ added in v1.2.0
func (s *DocumentLevelStrategy) GetName() string
GetName 返回策略名称
func (*DocumentLevelStrategy) SetConfig ¶ added in v1.2.0
func (s *DocumentLevelStrategy) SetConfig(config *StrategyConfig) error
SetConfig 设置策略配置
func (*DocumentLevelStrategy) ValidateConfig ¶ added in v1.2.0
func (s *DocumentLevelStrategy) ValidateConfig(config *StrategyConfig) error
ValidateConfig 验证策略特定的配置
type ElementLevelStrategy ¶ added in v1.2.0
type ElementLevelStrategy struct {
// contains filtered or unexported fields
}
ElementLevelStrategy 元素级分块策略(默认策略) 按 Markdown 元素类型逐个分块,保持与当前行为完全一致
func NewElementLevelStrategy ¶ added in v1.2.0
func NewElementLevelStrategy() *ElementLevelStrategy
NewElementLevelStrategy 创建新的元素级分块策略
func NewElementLevelStrategyWithConfig ¶ added in v1.2.0
func NewElementLevelStrategyWithConfig(config *StrategyConfig) *ElementLevelStrategy
NewElementLevelStrategyWithConfig 使用指定配置创建元素级分块策略
func (*ElementLevelStrategy) ChunkDocument ¶ added in v1.2.0
func (s *ElementLevelStrategy) ChunkDocument(doc ast.Node, source []byte, chunker *MarkdownChunker) ([]Chunk, error)
ChunkDocument 使用元素级策略对文档进行分块
func (*ElementLevelStrategy) Clone ¶ added in v1.2.0
func (s *ElementLevelStrategy) Clone() ChunkingStrategy
Clone 创建策略的副本(用于并发安全)
func (*ElementLevelStrategy) GetConfig ¶ added in v1.2.0
func (s *ElementLevelStrategy) GetConfig() *StrategyConfig
GetConfig 获取策略配置
func (*ElementLevelStrategy) GetDescription ¶ added in v1.2.0
func (s *ElementLevelStrategy) GetDescription() string
GetDescription 返回策略描述
func (*ElementLevelStrategy) GetName ¶ added in v1.2.0
func (s *ElementLevelStrategy) GetName() string
GetName 返回策略名称
func (*ElementLevelStrategy) SetConfig ¶ added in v1.2.0
func (s *ElementLevelStrategy) SetConfig(config *StrategyConfig) error
SetConfig 设置策略配置
func (*ElementLevelStrategy) ValidateConfig ¶ added in v1.2.0
func (s *ElementLevelStrategy) ValidateConfig(config *StrategyConfig) error
ValidateConfig 验证策略特定的配置
type ErrorHandler ¶
type ErrorHandler interface {
// HandleError 处理错误
HandleError(err *ChunkerError) error
// GetErrors 获取所有错误
GetErrors() []*ChunkerError
// ClearErrors 清除所有错误
ClearErrors()
// HasErrors 检查是否有错误
HasErrors() bool
}
ErrorHandler 错误处理器接口
type ErrorHandlingMode ¶
type ErrorHandlingMode int
ErrorHandlingMode 错误处理模式
const ( // ErrorModeStrict 严格模式,遇到错误立即返回 ErrorModeStrict ErrorHandlingMode = iota // ErrorModePermissive 宽松模式,记录错误但继续处理 ErrorModePermissive // ErrorModeSilent 静默模式,忽略错误 ErrorModeSilent )
type ErrorType ¶
type ErrorType int
ErrorType 错误类型
const ( // ErrorTypeInvalidInput 无效输入错误 ErrorTypeInvalidInput ErrorType = iota // ErrorTypeParsingFailed 解析失败错误 ErrorTypeParsingFailed // ErrorTypeMemoryExhausted 内存不足错误 ErrorTypeMemoryExhausted // ErrorTypeTimeout 超时错误 ErrorTypeTimeout // ErrorTypeConfigInvalid 配置无效错误 ErrorTypeConfigInvalid // ErrorTypeChunkTooLarge 块过大错误 ErrorTypeChunkTooLarge // ErrorTypeStrategyNotFound 策略未找到错误 ErrorTypeStrategyNotFound // ErrorTypeStrategyConfigInvalid 策略配置无效错误 ErrorTypeStrategyConfigInvalid // ErrorTypeStrategyExecutionFailed 策略执行失败错误 ErrorTypeStrategyExecutionFailed )
type HeadingLevelCondition ¶ added in v1.2.0
type HeadingLevelCondition struct {
MinLevel int `json:"min_level"` // 最小层级(包含)
MaxLevel int `json:"max_level"` // 最大层级(包含)
}
HeadingLevelCondition 标题层级条件 匹配指定层级范围内的标题
func NewHeadingLevelCondition ¶ added in v1.2.0
func NewHeadingLevelCondition(minLevel, maxLevel int) *HeadingLevelCondition
NewHeadingLevelCondition 创建标题层级条件
func (*HeadingLevelCondition) Clone ¶ added in v1.2.0
func (c *HeadingLevelCondition) Clone() RuleCondition
Clone 创建条件的副本
func (*HeadingLevelCondition) GetDescription ¶ added in v1.2.0
func (c *HeadingLevelCondition) GetDescription() string
GetDescription 返回条件描述
func (*HeadingLevelCondition) GetName ¶ added in v1.2.0
func (c *HeadingLevelCondition) GetName() string
GetName 返回条件名称
func (*HeadingLevelCondition) Match ¶ added in v1.2.0
func (c *HeadingLevelCondition) Match(node ast.Node, context *ChunkingContext) bool
Match 检查节点是否匹配标题层级条件
func (*HeadingLevelCondition) Validate ¶ added in v1.2.0
func (c *HeadingLevelCondition) Validate() error
Validate 验证条件配置
type HierarchicalChunk ¶ added in v1.2.0
type HierarchicalChunk struct {
Chunk Chunk `json:"chunk"` // 基础块信息
Children []*HierarchicalChunk `json:"children"` // 子块列表
Parent *HierarchicalChunk `json:"-"` // 父块引用(不序列化)
Level int `json:"level"` // 层级深度
}
HierarchicalChunk 表示层级结构中的块
type HierarchicalStrategy ¶ added in v1.2.0
type HierarchicalStrategy struct {
// contains filtered or unexported fields
}
HierarchicalStrategy 层级分块策略 按文档层级结构分块,将标题及其下属内容作为一个块
func NewHierarchicalStrategy ¶ added in v1.2.0
func NewHierarchicalStrategy() *HierarchicalStrategy
NewHierarchicalStrategy 创建新的层级分块策略
func NewHierarchicalStrategyWithConfig ¶ added in v1.2.0
func NewHierarchicalStrategyWithConfig(config *StrategyConfig) *HierarchicalStrategy
NewHierarchicalStrategyWithConfig 使用指定配置创建层级分块策略
func (*HierarchicalStrategy) ChunkDocument ¶ added in v1.2.0
func (s *HierarchicalStrategy) ChunkDocument(doc ast.Node, source []byte, chunker *MarkdownChunker) ([]Chunk, error)
ChunkDocument 使用层级策略对文档进行分块
func (*HierarchicalStrategy) Clone ¶ added in v1.2.0
func (s *HierarchicalStrategy) Clone() ChunkingStrategy
Clone 创建策略的副本(用于并发安全)
func (*HierarchicalStrategy) GetConfig ¶ added in v1.2.0
func (s *HierarchicalStrategy) GetConfig() *StrategyConfig
GetConfig 获取策略配置
func (*HierarchicalStrategy) GetDescription ¶ added in v1.2.0
func (s *HierarchicalStrategy) GetDescription() string
GetDescription 返回策略描述
func (*HierarchicalStrategy) GetName ¶ added in v1.2.0
func (s *HierarchicalStrategy) GetName() string
GetName 返回策略名称
func (*HierarchicalStrategy) SetConfig ¶ added in v1.2.0
func (s *HierarchicalStrategy) SetConfig(config *StrategyConfig) error
SetConfig 设置策略配置
func (*HierarchicalStrategy) ValidateConfig ¶ added in v1.2.0
func (s *HierarchicalStrategy) ValidateConfig(config *StrategyConfig) error
ValidateConfig 验证策略特定的配置
type Image ¶
type Image struct {
Alt string `json:"alt"` // 替代文本
URL string `json:"url"` // 图片地址
Title string `json:"title"` // 图片标题
Width string `json:"width"` // 图片宽度
Height string `json:"height"` // 图片高度
}
Image 表示图片信息
type ImageExtractor ¶
type ImageExtractor struct{}
ImageExtractor 图片提取器
func (*ImageExtractor) SupportedTypes ¶
func (e *ImageExtractor) SupportedTypes() []string
SupportedTypes 返回支持的内容类型
type LegacyChunkerConfig ¶ added in v1.2.0
type LegacyChunkerConfig struct {
// 基本配置
MaxChunkSize int `json:"max_chunk_size"`
EnabledTypes map[string]bool `json:"enabled_types"`
CustomExtractors []MetadataExtractor `json:"custom_extractors"`
ErrorHandling ErrorHandlingMode `json:"error_handling"`
PerformanceMode PerformanceMode `json:"performance_mode"`
FilterEmptyChunks bool `json:"filter_empty_chunks"`
PreserveWhitespace bool `json:"preserve_whitespace"`
MemoryLimit int64 `json:"memory_limit"`
EnableObjectPooling bool `json:"enable_object_pooling"`
// 日志配置
LogLevel string `json:"log_level"`
EnableLog bool `json:"enable_log"`
LogFormat string `json:"log_format"`
LogDirectory string `json:"log_directory"`
// 版本标识
Version ConfigVersion `json:"version,omitempty"`
}
LegacyChunkerConfig 旧版本的分块器配置(策略系统之前)
type Link ¶
type Link struct {
Text string `json:"text"` // 链接文本
URL string `json:"url"` // 链接地址
Type string `json:"type"` // 链接类型:internal, external, anchor
}
Link 表示链接信息
type LinkExtractor ¶
type LinkExtractor struct{}
LinkExtractor 链接提取器
func (*LinkExtractor) SupportedTypes ¶
func (e *LinkExtractor) SupportedTypes() []string
SupportedTypes 返回支持的内容类型
type LogContext ¶ added in v1.1.0
type LogContext struct {
FunctionName string `json:"function_name"` // 函数名
FileName string `json:"file_name"` // 文件名
LineNumber int `json:"line_number"` // 行号
NodeType string `json:"node_type"` // 节点类型
NodeID int `json:"node_id"` // 节点ID
ChunkCount int `json:"chunk_count"` // 块数量
DocumentSize int `json:"document_size"` // 文档大小
ProcessTime time.Duration `json:"process_time"` // 处理时间
Metadata map[string]any `json:"metadata"` // 额外元数据
}
LogContext 表示日志上下文信息
func NewLogContext ¶ added in v1.1.0
func NewLogContext(functionName string) *LogContext
NewLogContext 创建新的日志上下文
func (*LogContext) ToLogFields ¶ added in v1.1.0
func (lc *LogContext) ToLogFields() []any
ToLogFields 将日志上下文转换为日志字段
func (*LogContext) WithCodeInfo ¶ added in v1.1.0
func (lc *LogContext) WithCodeInfo(language string, lineCount int, codeBlockType string) *LogContext
WithCodeInfo 添加代码块特定信息到日志上下文
func (*LogContext) WithContentInfo ¶ added in v1.1.0
func (lc *LogContext) WithContentInfo(contentLength, textLength, wordCount int) *LogContext
WithContentInfo 添加内容统计信息到日志上下文
func (*LogContext) WithDocumentInfo ¶ added in v1.1.0
func (lc *LogContext) WithDocumentInfo(documentSize, chunkCount int) *LogContext
WithDocumentInfo 添加文档信息到日志上下文
func (*LogContext) WithHeadingInfo ¶ added in v1.1.0
func (lc *LogContext) WithHeadingInfo(level, wordCount int) *LogContext
WithHeadingInfo 添加标题特定信息到日志上下文
func (*LogContext) WithLinksAndImages ¶ added in v1.1.0
func (lc *LogContext) WithLinksAndImages(linksCount, imagesCount int) *LogContext
WithLinksAndImages 添加链接和图片信息到日志上下文
func (*LogContext) WithListInfo ¶ added in v1.1.0
func (lc *LogContext) WithListInfo(listType string, itemCount int) *LogContext
WithListInfo 添加列表特定信息到日志上下文
func (*LogContext) WithMetadata ¶ added in v1.1.0
func (lc *LogContext) WithMetadata(key string, value any) *LogContext
WithMetadata 添加自定义元数据到日志上下文
func (*LogContext) WithNodeInfo ¶ added in v1.1.0
func (lc *LogContext) WithNodeInfo(nodeType string, nodeID int) *LogContext
WithNodeInfo 添加节点信息到日志上下文
func (*LogContext) WithPositionInfo ¶ added in v1.1.0
func (lc *LogContext) WithPositionInfo(startLine, endLine, startCol, endCol int) *LogContext
WithPositionInfo 添加位置信息到日志上下文
func (*LogContext) WithProcessTime ¶ added in v1.1.0
func (lc *LogContext) WithProcessTime(duration time.Duration) *LogContext
WithProcessTime 添加处理时间到日志上下文
func (*LogContext) WithTableInfo ¶ added in v1.1.0
func (lc *LogContext) WithTableInfo(rowCount, columnCount int, isWellFormed bool) *LogContext
WithTableInfo 添加表格特定信息到日志上下文
type MarkdownChunker ¶
type MarkdownChunker struct {
// contains filtered or unexported fields
}
MarkdownChunker Markdown 分块器
func NewMarkdownChunker ¶
func NewMarkdownChunker() *MarkdownChunker
NewMarkdownChunker 创建新的分块器,使用默认配置 这个函数保持向后兼容性,确保现有代码无需修改即可工作
func NewMarkdownChunkerWithConfig ¶
func NewMarkdownChunkerWithConfig(config *ChunkerConfig) *MarkdownChunker
NewMarkdownChunkerWithConfig 使用指定配置创建新的分块器
func NewMarkdownChunkerWithHierarchicalStrategy ¶ added in v1.2.0
func NewMarkdownChunkerWithHierarchicalStrategy(maxDepth int) *MarkdownChunker
NewMarkdownChunkerWithHierarchicalStrategy 创建使用层级策略的分块器 这是一个便捷函数,用于快速创建层级分块器
func NewMarkdownChunkerWithStrategy ¶ added in v1.2.0
func NewMarkdownChunkerWithStrategy(strategyName string) *MarkdownChunker
NewMarkdownChunkerWithStrategy 使用指定策略创建新的分块器 这是一个便捷函数,用于快速创建使用特定策略的分块器
func (*MarkdownChunker) ChunkDocument ¶
func (c *MarkdownChunker) ChunkDocument(content []byte) ([]Chunk, error)
ChunkDocument 对整个文档进行分块
func (*MarkdownChunker) ClearStrategyCache ¶ added in v1.2.0
func (c *MarkdownChunker) ClearStrategyCache()
ClearStrategyCache 清空策略缓存
func (*MarkdownChunker) GetAvailableStrategies ¶ added in v1.2.0
func (c *MarkdownChunker) GetAvailableStrategies() []string
GetAvailableStrategies 获取所有可用的策略列表
func (*MarkdownChunker) GetCacheStats ¶ added in v1.2.0
func (c *MarkdownChunker) GetCacheStats() map[string]any
GetCacheStats 获取缓存统计信息
func (*MarkdownChunker) GetCurrentStrategy ¶ added in v1.2.0
func (c *MarkdownChunker) GetCurrentStrategy() (string, string)
GetCurrentStrategy 获取当前使用的策略信息
func (*MarkdownChunker) GetErrors ¶
func (c *MarkdownChunker) GetErrors() []*ChunkerError
GetErrors 获取处理过程中的所有错误
func (*MarkdownChunker) GetErrorsByType ¶
func (c *MarkdownChunker) GetErrorsByType(errorType ErrorType) []*ChunkerError
GetErrorsByType 按类型获取错误
func (*MarkdownChunker) GetPerformanceMonitor ¶
func (c *MarkdownChunker) GetPerformanceMonitor() *PerformanceMonitor
GetPerformanceMonitor 获取性能监控器(用于高级用法)
func (*MarkdownChunker) GetPerformanceStats ¶
func (c *MarkdownChunker) GetPerformanceStats() PerformanceStats
GetPerformanceStats 获取性能统计信息
func (*MarkdownChunker) GetStrategyConfig ¶ added in v1.2.0
func (c *MarkdownChunker) GetStrategyConfig() *StrategyConfig
GetStrategyConfig 获取当前策略的配置
func (*MarkdownChunker) GetStrategyCount ¶ added in v1.2.0
func (c *MarkdownChunker) GetStrategyCount() int
GetStrategyCount 获取已注册的策略数量
func (*MarkdownChunker) HasStrategy ¶ added in v1.2.0
func (c *MarkdownChunker) HasStrategy(strategyName string) bool
HasStrategy 检查是否存在指定的策略
func (*MarkdownChunker) RegisterStrategy ¶ added in v1.2.0
func (c *MarkdownChunker) RegisterStrategy(strategy ChunkingStrategy) error
RegisterStrategy 注册新的策略
func (*MarkdownChunker) ResetPerformanceMonitor ¶
func (c *MarkdownChunker) ResetPerformanceMonitor()
ResetPerformanceMonitor 重置性能监控器
func (*MarkdownChunker) SetStrategy ¶ added in v1.2.0
func (c *MarkdownChunker) SetStrategy(strategyName string, config *StrategyConfig) error
SetStrategy 设置分块策略
func (*MarkdownChunker) UnregisterStrategy ¶ added in v1.2.0
func (c *MarkdownChunker) UnregisterStrategy(strategyName string) error
UnregisterStrategy 注销策略
func (*MarkdownChunker) UpdateStrategyConfig ¶ added in v1.2.0
func (c *MarkdownChunker) UpdateStrategyConfig(config *StrategyConfig) error
UpdateStrategyConfig 更新当前策略的配置
type MemoryLimiter ¶
type MemoryLimiter struct {
// contains filtered or unexported fields
}
MemoryLimiter 内存限制器
func NewMemoryLimiter ¶
func NewMemoryLimiter(maxMemoryBytes int64) *MemoryLimiter
NewMemoryLimiter 创建新的内存限制器
func (*MemoryLimiter) CheckMemoryLimit ¶
func (ml *MemoryLimiter) CheckMemoryLimit() error
CheckMemoryLimit 检查内存使用是否超过限制
func (*MemoryLimiter) GetCurrentMemoryUsage ¶
func (ml *MemoryLimiter) GetCurrentMemoryUsage() int64
GetCurrentMemoryUsage 获取当前内存使用量
func (*MemoryLimiter) GetMemoryLimit ¶
func (ml *MemoryLimiter) GetMemoryLimit() int64
GetMemoryLimit 获取内存限制
func (*MemoryLimiter) SetLogger ¶ added in v1.1.0
func (ml *MemoryLimiter) SetLogger(logger log.Logger)
SetLogger 设置日志器
func (*MemoryLimiter) SetMemoryLimit ¶
func (ml *MemoryLimiter) SetMemoryLimit(maxMemoryBytes int64)
SetMemoryLimit 设置内存限制
type MemoryOptimizer ¶
type MemoryOptimizer struct {
// contains filtered or unexported fields
}
MemoryOptimizer 内存优化器
func NewMemoryOptimizer ¶
func NewMemoryOptimizer(memoryLimit int64) *MemoryOptimizer
NewMemoryOptimizer 创建新的内存优化器
func (*MemoryOptimizer) CheckMemoryLimit ¶
func (mo *MemoryOptimizer) CheckMemoryLimit() error
CheckMemoryLimit 检查内存限制
func (*MemoryOptimizer) GetGCThreshold ¶
func (mo *MemoryOptimizer) GetGCThreshold() int64
GetGCThreshold 获取GC触发阈值
func (*MemoryOptimizer) GetMemoryStats ¶
func (mo *MemoryOptimizer) GetMemoryStats() MemoryOptimizerStats
GetMemoryStats 获取内存统计信息
func (*MemoryOptimizer) GetStringBuilder ¶
func (mo *MemoryOptimizer) GetStringBuilder() *strings.Builder
GetStringBuilder 获取一个字符串构建器
func (*MemoryOptimizer) PutChunk ¶
func (mo *MemoryOptimizer) PutChunk(chunk *Chunk)
PutChunk 归还块对象到池中
func (*MemoryOptimizer) PutStringBuilder ¶
func (mo *MemoryOptimizer) PutStringBuilder(sb *strings.Builder)
PutStringBuilder 归还字符串构建器到池中
func (*MemoryOptimizer) RecordProcessedBytes ¶
func (mo *MemoryOptimizer) RecordProcessedBytes(bytes int64)
RecordProcessedBytes 记录已处理的字节数
func (*MemoryOptimizer) SetGCThreshold ¶
func (mo *MemoryOptimizer) SetGCThreshold(threshold int64)
SetGCThreshold 设置GC触发阈值
func (*MemoryOptimizer) SetLogger ¶ added in v1.1.0
func (mo *MemoryOptimizer) SetLogger(logger log.Logger)
SetLogger 设置日志器
type MemoryOptimizerStats ¶
type MemoryOptimizerStats struct {
CurrentMemory int64 `json:"current_memory"` // 当前内存使用
MemoryLimit int64 `json:"memory_limit"` // 内存限制
ProcessedBytes int64 `json:"processed_bytes"` // 已处理字节数
GCThreshold int64 `json:"gc_threshold"` // GC阈值
TotalAllocations int64 `json:"total_allocations"` // 总分配内存
GCCycles int64 `json:"gc_cycles"` // GC周期数
}
MemoryOptimizerStats 内存优化器统计信息
type MergeWithParentAction ¶ added in v1.2.0
type MergeWithParentAction struct {
Separator string `json:"separator"` // 合并时使用的分隔符
}
MergeWithParentAction 与父块合并动作 将匹配的节点合并到父块中
func NewMergeWithParentAction ¶ added in v1.2.0
func NewMergeWithParentAction(separator string) *MergeWithParentAction
NewMergeWithParentAction 创建与父块合并动作
func (*MergeWithParentAction) Clone ¶ added in v1.2.0
func (a *MergeWithParentAction) Clone() RuleAction
Clone 创建动作的副本
func (*MergeWithParentAction) Execute ¶ added in v1.2.0
func (a *MergeWithParentAction) Execute(node ast.Node, context *ChunkingContext) (*Chunk, error)
Execute 执行与父块合并动作
func (*MergeWithParentAction) GetDescription ¶ added in v1.2.0
func (a *MergeWithParentAction) GetDescription() string
GetDescription 返回动作描述
func (*MergeWithParentAction) GetName ¶ added in v1.2.0
func (a *MergeWithParentAction) GetName() string
GetName 返回动作名称
func (*MergeWithParentAction) Validate ¶ added in v1.2.0
func (a *MergeWithParentAction) Validate() error
Validate 验证动作配置
type MetadataExtractor ¶
type MetadataExtractor interface {
// Extract 从AST节点中提取元数据
Extract(node ast.Node, source []byte) map[string]string
// SupportedTypes 返回支持的内容类型
SupportedTypes() []string
}
MetadataExtractor 元数据提取器接口
type OptimizedStringOperations ¶
type OptimizedStringOperations struct {
// contains filtered or unexported fields
}
OptimizedStringOperations 优化的字符串操作
func NewOptimizedStringOperations ¶
func NewOptimizedStringOperations() *OptimizedStringOperations
NewOptimizedStringOperations 创建优化的字符串操作实例
func (*OptimizedStringOperations) BuildContent ¶
func (oso *OptimizedStringOperations) BuildContent(parts ...string) string
BuildContent 优化的内容构建
func (*OptimizedStringOperations) JoinStrings ¶
func (oso *OptimizedStringOperations) JoinStrings(strs []string, separator string) string
JoinStrings 优化的字符串连接
func (*OptimizedStringOperations) TrimAndClean ¶
func (oso *OptimizedStringOperations) TrimAndClean(text string) string
TrimAndClean 优化的字符串清理
type PerformanceMode ¶
type PerformanceMode int
PerformanceMode 性能模式
const ( // PerformanceModeDefault 默认性能模式 PerformanceModeDefault PerformanceMode = iota // PerformanceModeMemoryOptimized 内存优化模式 PerformanceModeMemoryOptimized // PerformanceModeSpeedOptimized 速度优化模式 PerformanceModeSpeedOptimized )
type PerformanceMonitor ¶
type PerformanceMonitor struct {
// contains filtered or unexported fields
}
PerformanceMonitor 性能监控器
func NewPerformanceMonitor ¶
func NewPerformanceMonitor() *PerformanceMonitor
NewPerformanceMonitor 创建新的性能监控器
func (*PerformanceMonitor) CheckMemoryThresholds ¶ added in v1.1.0
func (pm *PerformanceMonitor) CheckMemoryThresholds()
CheckMemoryThresholds 检查内存使用阈值并记录警告
func (*PerformanceMonitor) ForceGC ¶
func (pm *PerformanceMonitor) ForceGC()
ForceGC 强制垃圾回收(用于测试和内存优化)
func (*PerformanceMonitor) GetMemoryStats ¶
func (pm *PerformanceMonitor) GetMemoryStats() runtime.MemStats
GetMemoryStats 获取详细的内存统计信息
func (*PerformanceMonitor) GetStats ¶
func (pm *PerformanceMonitor) GetStats() PerformanceStats
GetStats 获取性能统计信息
func (*PerformanceMonitor) IsRunning ¶
func (pm *PerformanceMonitor) IsRunning() bool
IsRunning 检查监控器是否正在运行
func (*PerformanceMonitor) RecordBytes ¶
func (pm *PerformanceMonitor) RecordBytes(bytes int64)
RecordBytes 记录处理的字节数(用于输入文档大小)
func (*PerformanceMonitor) RecordChunk ¶
func (pm *PerformanceMonitor) RecordChunk(chunk *Chunk)
RecordChunk 记录处理的块信息
func (*PerformanceMonitor) RecordStrategyExecution ¶ added in v1.2.0
func (pm *PerformanceMonitor) RecordStrategyExecution(strategyName string, executionTime time.Duration, chunksGenerated int)
RecordStrategyExecution 记录策略执行信息
func (*PerformanceMonitor) SetLogger ¶ added in v1.1.0
func (pm *PerformanceMonitor) SetLogger(logger log.Logger)
SetLogger 设置日志器
type PerformanceStats ¶
type PerformanceStats struct {
ProcessingTime time.Duration `json:"processing_time"` // 处理时间
MemoryUsed int64 `json:"memory_used"` // 使用的内存(字节)
ChunksPerSecond float64 `json:"chunks_per_second"` // 每秒处理的块数
BytesPerSecond float64 `json:"bytes_per_second"` // 每秒处理的字节数
TotalChunks int `json:"total_chunks"` // 总块数
TotalBytes int64 `json:"total_bytes"` // 总字节数(输入文档大小)
ChunkBytes int64 `json:"chunk_bytes"` // 块内容总字节数
PeakMemory int64 `json:"peak_memory"` // 峰值内存使用
}
PerformanceStats 性能统计信息
type ProcessingResult ¶
ProcessingResult 处理结果
type RuleAction ¶ added in v1.2.0
type RuleAction interface {
// Execute 执行动作,返回处理后的块
Execute(node ast.Node, context *ChunkingContext) (*Chunk, error)
// GetName 返回动作名称
GetName() string
// GetDescription 返回动作描述
GetDescription() string
// Validate 验证动作配置是否有效
Validate() error
// Clone 创建动作的副本
Clone() RuleAction
}
RuleAction 规则动作接口 定义匹配条件后执行的动作
type RuleCondition ¶ added in v1.2.0
type RuleCondition interface {
// Match 检查节点是否匹配条件
Match(node ast.Node, context *ChunkingContext) bool
// GetName 返回条件名称
GetName() string
// GetDescription 返回条件描述
GetDescription() string
// Validate 验证条件配置是否有效
Validate() error
// Clone 创建条件的副本
Clone() RuleCondition
}
RuleCondition 规则条件接口 定义分块规则的匹配条件
type SkipNodeAction ¶ added in v1.2.0
type SkipNodeAction struct {
Reason string `json:"reason"` // 跳过原因
}
SkipNodeAction 跳过节点动作 跳过匹配的节点,不创建块
func NewSkipNodeAction ¶ added in v1.2.0
func NewSkipNodeAction(reason string) *SkipNodeAction
NewSkipNodeAction 创建跳过节点动作
func (*SkipNodeAction) Clone ¶ added in v1.2.0
func (a *SkipNodeAction) Clone() RuleAction
Clone 创建动作的副本
func (*SkipNodeAction) Execute ¶ added in v1.2.0
func (a *SkipNodeAction) Execute(node ast.Node, context *ChunkingContext) (*Chunk, error)
Execute 执行跳过节点动作
func (*SkipNodeAction) GetDescription ¶ added in v1.2.0
func (a *SkipNodeAction) GetDescription() string
GetDescription 返回动作描述
func (*SkipNodeAction) GetName ¶ added in v1.2.0
func (a *SkipNodeAction) GetName() string
GetName 返回动作名称
func (*SkipNodeAction) Validate ¶ added in v1.2.0
func (a *SkipNodeAction) Validate() error
Validate 验证动作配置
type StrategyCache ¶ added in v1.2.0
type StrategyCache struct {
// contains filtered or unexported fields
}
StrategyCache 策略缓存
func NewStrategyCache ¶ added in v1.2.0
func NewStrategyCache() *StrategyCache
NewStrategyCache 创建策略缓存
func (*StrategyCache) Get ¶ added in v1.2.0
func (sc *StrategyCache) Get(name string) (ChunkingStrategy, bool)
Get 从缓存获取策略
func (*StrategyCache) Keys ¶ added in v1.2.0
func (sc *StrategyCache) Keys() []string
Keys 获取所有缓存的策略名称
func (*StrategyCache) Put ¶ added in v1.2.0
func (sc *StrategyCache) Put(name string, strategy ChunkingStrategy)
Put 将策略放入缓存
func (*StrategyCache) Remove ¶ added in v1.2.0
func (sc *StrategyCache) Remove(name string)
Remove 从缓存移除策略
type StrategyConfig ¶ added in v1.2.0
type StrategyConfig struct {
// 通用配置
Name string `json:"name"` // 策略名称
Parameters map[string]any `json:"parameters"` // 策略参数
// 层级策略特定配置
MaxDepth int `json:"max_depth,omitempty"` // 最大层级深度
MinDepth int `json:"min_depth,omitempty"` // 最小层级深度
MergeEmpty bool `json:"merge_empty,omitempty"` // 是否合并空章节
// 大小限制配置
MinChunkSize int `json:"min_chunk_size,omitempty"` // 最小块大小
MaxChunkSize int `json:"max_chunk_size,omitempty"` // 最大块大小
// 内容过滤配置
IncludeTypes []string `json:"include_types,omitempty"` // 包含的内容类型
ExcludeTypes []string `json:"exclude_types,omitempty"` // 排除的内容类型
}
StrategyConfig 策略配置结构
func CreateConfigFromParameters ¶ added in v1.2.0
func CreateConfigFromParameters(strategyName string, params map[string]any) (*StrategyConfig, error)
CreateConfigFromParameters 从参数映射创建策略配置
func DefaultStrategyConfig ¶ added in v1.2.0
func DefaultStrategyConfig(name string) *StrategyConfig
DefaultStrategyConfig 创建默认策略配置
func DocumentLevelConfig ¶ added in v1.2.0
func DocumentLevelConfig() *StrategyConfig
DocumentLevelConfig 创建文档级策略配置
func DocumentLevelConfigWithSize ¶ added in v1.2.0
func DocumentLevelConfigWithSize(minSize, maxSize int) *StrategyConfig
DocumentLevelConfigWithSize 创建带大小限制的文档级策略配置
func ElementLevelConfig ¶ added in v1.2.0
func ElementLevelConfig() *StrategyConfig
ElementLevelConfig 创建元素级策略配置
func ElementLevelConfigWithSize ¶ added in v1.2.0
func ElementLevelConfigWithSize(minSize, maxSize int) *StrategyConfig
ElementLevelConfigWithSize 创建带大小限制的元素级策略配置
func ElementLevelConfigWithTypes ¶ added in v1.2.0
func ElementLevelConfigWithTypes(includeTypes, excludeTypes []string) *StrategyConfig
ElementLevelConfigWithTypes 创建带内容类型过滤的元素级策略配置
func HierarchicalConfig ¶ added in v1.2.0
func HierarchicalConfig(maxDepth int) *StrategyConfig
HierarchicalConfig 创建层级策略配置
func HierarchicalConfigAdvanced ¶ added in v1.2.0
func HierarchicalConfigAdvanced(maxDepth, minDepth int, mergeEmpty bool) *StrategyConfig
HierarchicalConfigAdvanced 创建高级层级策略配置
func HierarchicalConfigWithSize ¶ added in v1.2.0
func HierarchicalConfigWithSize(maxDepth, minSize, maxSize int) *StrategyConfig
HierarchicalConfigWithSize 创建带大小限制的层级策略配置
func MergeConfigs ¶ added in v1.2.0
func MergeConfigs(base, override *StrategyConfig) (*StrategyConfig, error)
MergeConfigs 合并两个策略配置
func (*StrategyConfig) Clone ¶ added in v1.2.0
func (sc *StrategyConfig) Clone() *StrategyConfig
Clone 创建策略配置的副本
func (*StrategyConfig) String ¶ added in v1.2.0
func (sc *StrategyConfig) String() string
String 返回策略配置的字符串表示
func (*StrategyConfig) ValidateConfig ¶ added in v1.2.0
func (sc *StrategyConfig) ValidateConfig() error
ValidateConfig 验证策略配置
type StrategyPool ¶ added in v1.2.0
type StrategyPool struct {
// contains filtered or unexported fields
}
StrategyPool 策略实例池
func (*StrategyPool) CreatePool ¶ added in v1.2.0
func (sp *StrategyPool) CreatePool(strategyName string, factory func() ChunkingStrategy)
CreatePool 为指定策略创建池
func (*StrategyPool) Get ¶ added in v1.2.0
func (sp *StrategyPool) Get(strategyName string, factory func() ChunkingStrategy) ChunkingStrategy
Get 从池中获取策略实例
func (*StrategyPool) GetPoolCount ¶ added in v1.2.0
func (sp *StrategyPool) GetPoolCount() int
GetPoolCount 获取池的数量
func (*StrategyPool) HasPool ¶ added in v1.2.0
func (sp *StrategyPool) HasPool(strategyName string) bool
HasPool 检查是否存在指定策略的池
func (*StrategyPool) Put ¶ added in v1.2.0
func (sp *StrategyPool) Put(strategy ChunkingStrategy)
Put 将策略实例放回池中
func (*StrategyPool) RemovePool ¶ added in v1.2.0
func (sp *StrategyPool) RemovePool(strategyName string)
RemovePool 移除指定策略的池
type StrategyRegistry ¶ added in v1.2.0
type StrategyRegistry struct {
// contains filtered or unexported fields
}
StrategyRegistry 策略注册器
func NewStrategyRegistry ¶ added in v1.2.0
func NewStrategyRegistry() *StrategyRegistry
NewStrategyRegistry 创建策略注册器
func (*StrategyRegistry) Get ¶ added in v1.2.0
func (sr *StrategyRegistry) Get(name string) (ChunkingStrategy, error)
Get 获取策略
func (*StrategyRegistry) GetStrategyCount ¶ added in v1.2.0
func (sr *StrategyRegistry) GetStrategyCount() int
GetStrategyCount 获取已注册策略数量
func (*StrategyRegistry) HasStrategy ¶ added in v1.2.0
func (sr *StrategyRegistry) HasStrategy(name string) bool
HasStrategy 检查策略是否存在
func (*StrategyRegistry) List ¶ added in v1.2.0
func (sr *StrategyRegistry) List() []string
List 列出所有可用策略
func (*StrategyRegistry) Register ¶ added in v1.2.0
func (sr *StrategyRegistry) Register(strategy ChunkingStrategy) error
Register 注册策略
func (*StrategyRegistry) Unregister ¶ added in v1.2.0
func (sr *StrategyRegistry) Unregister(name string) error
Unregister 注销策略
type StringBuilderPool ¶
type StringBuilderPool struct {
// contains filtered or unexported fields
}
StringBuilderPool 字符串构建器对象池
func NewStringBuilderPool ¶
func NewStringBuilderPool() *StringBuilderPool
NewStringBuilderPool 创建新的字符串构建器对象池
func (*StringBuilderPool) Get ¶
func (sbp *StringBuilderPool) Get() *strings.Builder
Get 从池中获取一个字符串构建器
func (*StringBuilderPool) Put ¶
func (sbp *StringBuilderPool) Put(sb *strings.Builder)
Put 将字符串构建器放回池中
type TableInfo ¶
type TableInfo struct {
Rows int
Columns int
HasHeader bool
HeaderCells []string
DataRows [][]string
Alignments []string // left, center, right
CellTypes map[string]string // 单元格内容类型分析
IsWellFormed bool
Errors []string
}
TableInfo 表格信息结构
func (*TableInfo) GetTableMetadata ¶
GetTableMetadata 获取表格元数据
type WorkerPool ¶
type WorkerPool struct {
// contains filtered or unexported fields
}
WorkerPool 工作池,用于更精细的并发控制
func NewWorkerPool ¶
func NewWorkerPool(workers int, config *ChunkerConfig) *WorkerPool
NewWorkerPool 创建新的工作池
func (*WorkerPool) ProcessBatch ¶
func (wp *WorkerPool) ProcessBatch(contents [][]byte) ([][]Chunk, []error)
ProcessBatch 批量处理任务