Documentation
¶
Overview ¶
pkg/compress/compress.go
pkg/compress/compress_chunked.go
pkg/compress/compress_dict.go
pkg/compress/compress_xz.go
pkg/compress/compress_zip.go
pkg/compress/errors.go
pkg/compress/gitignore.go
pkg/compress/options.go
pkg/compress/pools.go
pkg/compress/progress.go
pkg/compress/result.go
Index ¶
- Constants
- Variables
- func FormatSize(bytes uint64) string
- func FormatSummary(result *Result, opts *Options) string
- func TruncateLeft(path string, maxLen int) string
- type EventType
- type Options
- type Parallelism
- type ProgressCallback
- type ProgressEvent
- type Result
- func (r *Result) CompressionRatio() float64
- func (r *Result) DedupRatio() float64
- func (r *Result) GetCompressedSize() uint64
- func (r *Result) GetErrors() []error
- func (r *Result) GetFilesProcessed() int
- func (r *Result) GetFilesTotal() int
- func (r *Result) GetOriginalSize() uint64
- func (r *Result) Success() bool
Constants ¶
const ( // MinDictSize is the minimum dictionary size required by zstd encoder // The zstd library uses internal history buffers that require at least 32KB MinDictSize = 32 * 1024 // MaxDictSize is the maximum useful dictionary size MaxDictSize = 112 * 1024 // MinSampleSizeForDict is the minimum individual sample size for dictionary training // Small samples are fine - the library handles them well // Only skip truly tiny samples that add noise without useful patterns MinSampleSizeForDict = 64 )
Variables ¶
var ( // ErrInputRequired is returned when input path is not specified ErrInputRequired = errors.New("input path is required") // ErrInvalidLevelZstd is returned when zstd compression level is out of range ErrInvalidLevelZstd = errors.New("compression level for GDELTA (zstd) must be between 1 and 22") // ErrInvalidLevelZip is returned when zip compression level is out of range ErrInvalidLevelZip = errors.New("compression level for ZIP (deflate) must be between 1 and 9") // ErrNoFiles is returned when no files are found to compress ErrNoFiles = errors.New("no regular files found to compress") // ErrZipNoChunking is returned when trying to use chunking with ZIP format ErrZipNoChunking = errors.New("chunk-based deduplication is not supported in ZIP format") // ErrZipNoDictionary is returned when trying to use dictionary with ZIP format ErrZipNoDictionary = errors.New("dictionary compression is not supported in ZIP format") // ErrXzNoChunking is returned when trying to use chunking with XZ format ErrXzNoChunking = errors.New("chunk-based deduplication is not supported in XZ format") // ErrXzNoDictionary is returned when trying to use dictionary with XZ format ErrXzNoDictionary = errors.New("dictionary compression is not supported in XZ format") // ErrXzNoZip is returned when trying to use both XZ and ZIP formats ErrXzNoZip = errors.New("cannot use both XZ and ZIP formats") // ErrInvalidLevelXz is returned when XZ compression level is out of range ErrInvalidLevelXz = errors.New("compression level for XZ (LZMA2) must be between 1 and 9") // ErrDictionaryNoChunking is returned when trying to use both dictionary and chunking ErrDictionaryNoChunking = errors.New("dictionary compression cannot be combined with chunking") // ErrInvalidParallelism is returned when parallelism strategy is invalid ErrInvalidParallelism = errors.New("parallelism must be 'auto', 'folder', or 'file'") // ErrChunkSizeTooSmall is returned when chunk size is below minimum ErrChunkSizeTooSmall = errors.New("chunk size must be at least 4KB (4096 bytes)") // ErrChunkSizeTooLarge is returned when chunk size exceeds reasonable maximum ErrChunkSizeTooLarge = errors.New("chunk size must not exceed 64MB (67108864 bytes)") )
Functions ¶
func FormatSize ¶ added in v0.0.5
FormatSize formats bytes into human-readable string
func FormatSummary ¶ added in v0.0.5
FormatSummary formats a compression result into a human-readable summary string
func TruncateLeft ¶ added in v0.0.5
TruncateLeft truncates a path from the left to fit maxLen, preserving the filename
Types ¶
type Options ¶
type Options struct {
// Input path (file or directory)
// Ignored if Files is provided
InputPath string
// Files allows library users to provide a custom list of files/folders to compress
// When set, InputPath is ignored
// Each path can be absolute or relative, file or directory
// This option is for library use only (not exposed in CLI)
Files []string
// Output archive path
OutputPath string
// Maximum number of concurrent compression threads
// Default: runtime.NumCPU()
MaxThreads int
// Parallelism strategy: "auto", "folder", or "file"
// Default: "auto"
Parallelism Parallelism
// Maximum memory per thread before flushing to disk (bytes)
// 0 = unlimited (flush only at folder boundaries)
// Default: 0
MaxThreadMemory uint64
// Chunk size for content-based deduplication (bytes)
// 0 = disabled (traditional file-level compression)
// Default: 0
ChunkSize uint64
// Maximum chunk store size in MB (bounds memory usage for deduplication)
// Calculated as: maxChunks = ChunkStoreSize / (ChunkSize / 1MB)
// 0 = unlimited (store all unique chunks)
// Default: 0
ChunkStoreSize uint64
// Compression level (1-22 for zstd, 1-9 for zip deflate)
// 1=fastest, 9=balanced, 19+=maximum compression (zstd only)
// Default: 5
Level int
// UseZipFormat creates a standard ZIP archive instead of GDELTA format
// Uses Deflate compression (universally compatible)
// Cannot be combined with ChunkSize (deduplication not supported in ZIP mode)
// Default: false
UseZipFormat bool
// UseXzFormat creates standard .tar.xz archives instead of GDELTA format
// Uses LZMA2 compression (best compression ratio, slower than zstd)
// Cannot be combined with ChunkSize or UseDictionary
// Default: false
UseXzFormat bool
// UseDictionary enables GDELTA03 dictionary-based compression
// Trains a zstd dictionary from input files for better compression
// Especially effective for many small files with common patterns
// Cannot be combined with ChunkSize or UseZipFormat
// Default: false
UseDictionary bool
// DryRun simulates compression without writing
DryRun bool
// Verbose enables detailed logging
Verbose bool
// ProgressWriter receives progress updates (optional)
// If nil and Quiet=false, progress goes to stdout
ProgressWriter io.Writer
// Quiet suppresses all output except errors
Quiet bool
// UseGitignore respects .gitignore files to exclude matching paths
UseGitignore bool
// DisableGC disables garbage collection during compression for maximum
// throughput. Uses pooled buffers to minimize allocations. GC is re-enabled
// after compression completes. Only affects ZIP compression mode.
// Default: false
DisableGC bool
}
Options configures the compression behavior
func DefaultOptions ¶
func DefaultOptions() *Options
DefaultOptions returns options with sensible defaults
type Parallelism ¶ added in v0.0.10
type Parallelism string
Parallelism defines the parallelism strategy
const ( // ParallelismAuto auto-detects based on input structure // Uses folder mode if enough folders, file mode otherwise ParallelismAuto Parallelism = "auto" // ParallelismFolder processes whole folders per worker (original behavior) // Best when: many folders with few files each ParallelismFolder Parallelism = "folder" // ParallelismFile processes individual files per worker with folder affinity // Files from same folder go to same worker for locality // Best when: flat directories or few folders with many files ParallelismFile Parallelism = "file" )
type ProgressCallback ¶
type ProgressCallback func(event ProgressEvent)
ProgressCallback is called for various progress events
func ProgressBarCallback ¶ added in v0.0.5
func ProgressBarCallback() (ProgressCallback, *mpb.Progress)
ProgressBarCallback creates a progress callback that displays multi-progress bars Returns the callback function and the progress container (call Wait() after compression)
type ProgressEvent ¶
type ProgressEvent struct {
Type EventType
FilePath string
Current int64
Total int64
CurrentBytes uint64
TotalBytes uint64
CompressedSize uint64
}
ProgressEvent contains progress information
type Result ¶
type Result struct {
// Total number of files found
FilesTotal int
// Number of files successfully compressed
FilesProcessed int
// Total original size in bytes
OriginalSize uint64
// Total compressed size in bytes
CompressedSize uint64
// ChunkSize is the configured chunk size (0 if chunking disabled)
ChunkSize uint64
// Chunk deduplication statistics (when chunking enabled)
TotalChunks uint64 // Total chunks processed
UniqueChunks uint64 // Unique chunks stored
DedupedChunks uint64 // Chunks that were deduplicated
BytesSaved uint64 // Bytes saved through deduplication
Evictions uint64 // Chunks evicted from LRU cache (doesn't affect archive)
// List of errors encountered (non-fatal)
Errors []error
}
Result contains statistics about the compression operation
func Compress ¶
func Compress(opts *Options, progressCb ProgressCallback) (*Result, error)
Compress compresses files from inputPath into an archive at outputPath
func (*Result) CompressionRatio ¶
CompressionRatio returns the compression ratio as a percentage
func (*Result) DedupRatio ¶ added in v0.0.2
DedupRatio returns the deduplication ratio as a percentage
func (*Result) GetCompressedSize ¶ added in v0.0.5
GetCompressedSize returns compressed size (interface method)
func (*Result) GetFilesProcessed ¶ added in v0.0.5
GetFilesProcessed returns processed files (interface method)
func (*Result) GetFilesTotal ¶ added in v0.0.5
GetFilesTotal returns total files (interface method)
func (*Result) GetOriginalSize ¶ added in v0.0.5
GetOriginalSize returns original size (interface method)