Documentation ¶
Index ¶
- Constants
- Variables
- func Bam2Bundles(inBam string, outDir string, minBundle int, nrProcBam int, quiet, silent bool)
- func BamToolAccStats(p *BamToolParams)
- func BamToolAlnContext(p *BamToolParams)
- func BamToolDump(p *BamToolParams)
- func BamToolbox(toolYaml string, inFile string, outFile string, quiet bool, silent bool, ...)
- func BashExec(command string)
- func CountReads(bamReader *bam.Reader, bamWriter *bam.Writer, countFile string, field string, ...)
- func Execute()
- func FasLinesToSimpleSeq(lines FqLines) (*simpleSeq, error)
- func FileExists(fn string) bool
- func FileSize(file string) int
- func FqLinesToSimpleSeq(lines FqLines, qBase int, gaps bool) (*simpleSeq, error)
- func GetSamAcc(r *sam.Record) float64
- func GetSamDump(field string, r *sam.Record) string
- func GetSamEndPos(r *sam.Record) int
- func GetSamHardClipped(r *sam.Record) int
- func GetSamIsSec(r *sam.Record) int
- func GetSamIsSup(r *sam.Record) int
- func GetSamLeftClip(r *sam.Record) int
- func GetSamLeftHardClip(r *sam.Record) int
- func GetSamLeftSoftClip(r *sam.Record) int
- func GetSamLeftSoftClipSeq(r *sam.Record) string
- func GetSamMapQual(r *sam.Record) int
- func GetSamMapped(r *sam.Record) bool
- func GetSamMeanBaseQual(r *sam.Record) float64
- func GetSamName(r *sam.Record) string
- func GetSamPos(r *sam.Record) int
- func GetSamReadAln(r *sam.Record) int
- func GetSamReadAlnSeq(r *sam.Record) string
- func GetSamReadCov(r *sam.Record) float64
- func GetSamReadLen(r *sam.Record) int
- func GetSamReadSeq(r *sam.Record) string
- func GetSamRef(r *sam.Record) string
- func GetSamRefAln(r *sam.Record) int
- func GetSamRefCov(r *sam.Record) float64
- func GetSamRefLen(r *sam.Record) int
- func GetSamReverse(r *sam.Record) bool
- func GetSamRightClip(r *sam.Record) int
- func GetSamRightHardClip(r *sam.Record) int
- func GetSamRightSoftClip(r *sam.Record) int
- func GetSamRightSoftClipSeq(r *sam.Record) string
- func GetSamStrand(r *sam.Record) int
- func IsPidAlive(pid int) bool
- func LaunchFxWatchers(dirs []string, ctrlChan WatchCtrlChan, re *regexp.Regexp, inFmt, outFmt string, ...)
- func ListTools(p *BamToolParams)
- func MaxInts(s []int) (m int)
- func MinInts(s []int) (m int)
- func NewAnonLinearSeq(s string) *linear.Seq
- func NewBamReader(bamFile string, nrProc int) *bam.Reader
- func NewBamReaderChan(inFile string, cp int, buff int, threads int) (chan *sam.Record, *bam.Reader)
- func NewBamSinkChan(cp int) (chan *sam.Record, chan bool)
- func NewBamWriterChan(inFile string, head *sam.Header, cp int, buff int, threads int) (chan *sam.Record, chan bool)
- func NewFxWatcher(dir string, seqChan chan *simpleSeq, ...)
- func NewRawFastaStream(name string, inFh *xopen.Reader, inReader *bufio.Reader, ...) chan *simpleSeq
- func NewRawFastqStream(name string, inFh *xopen.Reader, inReader *bufio.Reader, ...) chan *simpleSeq
- func NewRawSeqStreamFromFile(inFastq string, seqChan chan *simpleSeq, qBase int, format string, ...) (chan SeqStreamCtrl, chan SeqStreamCtrl)
- func NewSAMRecordFromAln(name string, ref *sam.Reference, refStart, refEnd, queryStart, queryEnd int, ...) (*sam.Record, error)
- func ParseByteSize(val string) (int64, error)
- func PrintTsvLine(fields []string) string
- func RevCompDNA(s string) string
- func ReverseInt(d []int) []int
- func SamDumper(fields []string, r *sam.Record) []string
- func SubLocationFlanking(length, B, E, begin, end int, strictMode bool) (int, int, bool)
- func SubLocationInner(length, B, E, begin, end int, strictMode bool) (int, int, bool)
- func SumInts(s []int) (r int)
- func ValidateSeq(seq *simpleSeq, gaps bool) error
- type AlignedSeq
- type AlnDetails
- type AlnParams
- type AmpliconFinder
- func (finder *AmpliconFinder) Locate() ([]int, []int, error)
- func (finder *AmpliconFinder) LocateRange(begin, end int, flanking bool, strictMode bool) ([]int, []int, error)
- func (finder *AmpliconFinder) Location() ([]int, []int, error)
- func (finder *AmpliconFinder) Reset(sequence []byte, maxMismatch int) error
- type BamTool
- type BamToolParams
- type BedFeature
- type ColorCycler
- type Config
- type FqLine
- type FqLines
- type FqlState
- type FxWatcher
- type Locus
- type Queries
- type Query
- type Range
- type Ranges
- type ReadCounts
- type RecordLoopBuffer
- type RecordNode
- type RefCounts
- type RefWithFaidx
- type Reference
- type Scorer
- type SeqColorizer
- func (p *SeqColorizer) Color(seq []byte) []byte
- func (p *SeqColorizer) ColorAmino(seq []byte) []byte
- func (p *SeqColorizer) ColorNucleic(seq []byte) []byte
- func (p *SeqColorizer) ColorNucleicWithQuals(seq []byte, quals []byte) []byte
- func (p *SeqColorizer) ColorQuals(quals []byte) []byte
- func (p *SeqColorizer) ColorWithQuals(seq []byte, quals []byte) []byte
- func (p *SeqColorizer) WrapWriter(fh *os.File) io.Writer
- type SeqDetector
- type SeqStreamCtrl
- type SumResult
- type Toolshed
- type TopBuffer
- type WatchCtrl
- type WatchCtrlChan
- type WatchedFx
- type WatchedFxPool
Constants ¶
const BIG_SLEEP = 100 * time.Millisecond
const MICRO_SLEEP = time.Millisecond
const NAP_SLEEP = 10 * time.Millisecond
const VERSION = "2.8.0"
VERSION of seqkit
Variables ¶
var IUPACAminoAcids, _ = asciiset.MakeASCIISet("ACDEFGHIKLMNPQRSTVWY")
var IUPACBases, _ = asciiset.MakeASCIISet("ACGTRYSWKMBDHVNUacgtryswkmbdhvnu")
var RootCmd = &cobra.Command{ Use: "seqkit", Short: "a cross-platform and ultrafast toolkit for FASTA/Q file manipulation", Long: fmt.Sprintf(`SeqKit -- a cross-platform and ultrafast toolkit for FASTA/Q file manipulation Version: %s Author: Wei Shen <shenwei356@gmail.com> Documents : http://bioinf.shenwei.me/seqkit Source code: https://github.com/shenwei356/seqkit Please cite: https://doi.org/10.1371/journal.pone.0163962 Seqkit utlizies the pgzip (https://github.com/klauspost/pgzip) package to read and write gzip file, and the outputted gzip file would be slighty larger than files generated by GNU gzip. Seqkit writes gzip files very fast, much faster than the multi-threaded pigz, therefore there's no need to pipe the result to gzip/pigz. Seqkit also supports reading and writing xz (.xz) and zstd (.zst) formats since v2.2.0. Bzip2 format is supported since v2.4.0. Compression level: format range default comment gzip 1-9 5 https://github.com/klauspost/pgzip sets 5 as the default value. xz NA NA https://github.com/ulikunitz/xz does not support. zstd 1-4 2 roughly equals to zstd 1, 3, 7, 11, respectively. bzip 1-9 6 https://github.com/dsnet/compress `, VERSION), }
RootCmd represents the base command when called without any subcommands
var Threads = runtime.NumCPU()
Threads for bread.NewBufferedReader()
Functions ¶
func Bam2Bundles ¶
func BamToolAccStats ¶
func BamToolAccStats(p *BamToolParams)
func BamToolAlnContext ¶
func BamToolAlnContext(p *BamToolParams)
func BamToolDump ¶
func BamToolDump(p *BamToolParams)
func BamToolbox ¶
func CountReads ¶
func CountReads(bamReader *bam.Reader, bamWriter *bam.Writer, countFile string, field string, rangeMin, rangeMax float64, printPass bool, printPrim bool, printLog bool, printBins int, binMode string, mapQual int, printFreq int, printDump bool, printDelay int, printPdf string, execBefore, execAfter string, includeIds map[string]bool, excludeIds map[string]bool, printQuiet bool)
CountReads counts total, secondary and supplementary reads mapped to each reference.
func Execute ¶
func Execute()
Execute adds all child commands to the root command sets flags appropriately. This is called by main.main(). It only needs to happen once to the rootCmd.
func FasLinesToSimpleSeq ¶
FasLinesToSimpleSeq attempts to construct a valid sequence record from a buffer of parsed lines.
func FileExists ¶
FileExists checks if a file exists by calling os.Stat.
func FqLinesToSimpleSeq ¶
FqLinesToSimpleSeq attempts to construct a valid fastq record from a buffer of parsed lines.
func GetSamEndPos ¶
func GetSamHardClipped ¶
func GetSamIsSec ¶
func GetSamIsSup ¶
func GetSamLeftClip ¶
func GetSamLeftHardClip ¶
func GetSamLeftSoftClip ¶
func GetSamLeftSoftClipSeq ¶
func GetSamMapQual ¶
func GetSamMapped ¶
func GetSamMeanBaseQual ¶
func GetSamName ¶
func GetSamReadAln ¶
func GetSamReadAlnSeq ¶
func GetSamReadCov ¶
func GetSamReadLen ¶
func GetSamReadSeq ¶
func GetSamRefAln ¶
func GetSamRefCov ¶
func GetSamRefLen ¶
func GetSamReverse ¶
func GetSamRightClip ¶
func GetSamRightHardClip ¶
func GetSamRightSoftClip ¶
func GetSamRightSoftClipSeq ¶
func GetSamStrand ¶
func IsPidAlive ¶
func LaunchFxWatchers ¶
func LaunchFxWatchers(dirs []string, ctrlChan WatchCtrlChan, re *regexp.Regexp, inFmt, outFmt string, qBase int, allowGaps bool, delta int, timeout string, dropString string, waitPid int, findOnly bool, outw *xopen.Writer)
LaunchFxWatchers launches fastx watcher goroutines on multiple input directories.
func ListTools ¶
func ListTools(p *BamToolParams)
func NewAnonLinearSeq ¶
NewAnonLinearSeq makes a new anonymous linear.Seq.
func NewBamReader ¶
NewBamReader creates a new BAM reader from file.
func NewBamReaderChan ¶
func NewBamWriterChan ¶
func NewFxWatcher ¶
func NewFxWatcher(dir string, seqChan chan *simpleSeq, watcherCtrlChanIn, watcherCtrlChanOut WatchCtrlChan, re *regexp.Regexp, inFmt, outFmt string, qBase int, allowGaps bool, minDelta int, dropString string, findOnly bool)
NewFxWatcher streams records from fastx files under a directory.
func NewRawFastaStream ¶
func NewRawFastaStream(name string, inFh *xopen.Reader, inReader *bufio.Reader, seqChan chan *simpleSeq, id string, ctrlChanIn, ctrlChanOut chan SeqStreamCtrl, gaps bool) chan *simpleSeq
NewRawSeqStream initializes a new channel for reading fastq records in a robust way.
func NewRawFastqStream ¶
func NewRawFastqStream(name string, inFh *xopen.Reader, inReader *bufio.Reader, seqChan chan *simpleSeq, qBase int, id string, ctrlChanIn, ctrlChanOut chan SeqStreamCtrl, gaps bool) chan *simpleSeq
NewRawSeqStream initializes a new channel for reading fastq records in a robust way.
func NewRawSeqStreamFromFile ¶
func NewRawSeqStreamFromFile(inFastq string, seqChan chan *simpleSeq, qBase int, format string, allowGaps bool) (chan SeqStreamCtrl, chan SeqStreamCtrl)
NewRawSeqStream initializes a new channel for reading fastq records from a file in a robust way.
func NewSAMRecordFromAln ¶
func NewSAMRecordFromAln(name string, ref *sam.Reference, refStart, refEnd, queryStart, queryEnd int, refAln, queryAln string, strand string, mapQ byte, seq string, qual []byte, aux []sam.Aux) (*sam.Record, error)
NewRecordFromAln builds a new SAM record based on the provided local alignment and its reference/query coordinates.
func ParseByteSize ¶
ParseByteSize parses byte size from string
func PrintTsvLine ¶
func RevCompDNA ¶
RevCompDNA reverse complements a DNA sequence string.
func SubLocationFlanking ¶
SubLocationFlanking returns location of a flanking range (begin:end, relative to amplicon). B/E: 0-based, location of amplicon. begin/end: 1-based, begin: relative location to 5' end of amplicon, end: relative location to 3' end of amplicon. Returned locations are 1-based.
F -----===============----- -3-1 x/y 1 3 5 x/y F R -----=====-----=====----- ===== -5:-1 === -5:-3 ===== 1:5 === 3:5 ================= -1:1 ========================= -5:5 x:-y (invalid)
func SubLocationInner ¶
SubLocationInner returns location of a range (begin:end, relative to amplicon). B/E: 0-based, location of amplicon. begin/end: 1-based, begin: relative location to 5' end of amplicon, end: relative location to 3' end of amplicon. Returned locations are 1-based.
F -----===============----- 1 3 5 x/y -5-3-1 x/y F R -----=====-----=====----- x:y =============== 1:-1 ======= 1:7 ===== 3:7 ===== 6:10 ===== -10:-6 ===== -7:-3 -x:y (invalid)
func ValidateSeq ¶
ValidateSeq validates simpleSeq objects.
Types ¶
type AlignedSeq ¶
type AlignedSeq struct { Ref *Reference Query *Query QueryAln string RefAln string RefStart int RefEnd int QueryStart int QueryEnd int Score float64 Best bool Detector *SeqDetector }
AlignedSeq holds alignment results.
func AlignInfo ¶
func AlignInfo(r *Reference, q *Query, f []feat.Pair) *AlignedSeq
AlignInfo constructs an *AlignedSeq structure based on raw alignment results.
func PairwiseAlignSW ¶
func PairwiseAlignSW(r *Reference, q *Query, alnParams *AlnParams) *AlignedSeq
PairwiseAlignSW performs pairwise local alignment of two sequences using the biogo implementation of the Smith-Waterman algorithm.
func (*AlignedSeq) AlnString ¶
func (a *AlignedSeq) AlnString() string
func (*AlignedSeq) Fields ¶
func (a *AlignedSeq) Fields() []string
Fields returns the fields of AlignedSeq in a defined order.
func (*AlignedSeq) String ¶
func (a *AlignedSeq) String() string
String generates string represenattion of a *AlignedSeq.
type AlnDetails ¶
type AlnDetails struct { Match int Mismatch int MatchMismatch int Insertion int Deletion int Skip int Len int Acc float64 WAcc float64 }
func GetSamAlnDetails ¶
func GetSamAlnDetails(r *sam.Record) *AlnDetails
type AmpliconFinder ¶
type AmpliconFinder struct { Seq []byte F []byte // Forward primer R []byte // R should be reverse complementary sequence of reverse primer MaxMismatch int FMindex *fmi.FMIndex // contains filtered or unexported fields }
AmpliconFinder is a struct for locating amplicon via primer(s).
func NewAmpliconFinder ¶
func NewAmpliconFinder(sequence, forwardPrimer, reversePrimerRC []byte, maxMismatch int) (*AmpliconFinder, error)
NewAmpliconFinder returns a AmpliconFinder struct.
func (*AmpliconFinder) Locate ¶
func (finder *AmpliconFinder) Locate() ([]int, []int, error)
Locate returns location of amplicon. Locations are 1-based, nil returns if not found.
func (*AmpliconFinder) LocateRange ¶
func (finder *AmpliconFinder) LocateRange(begin, end int, flanking bool, strictMode bool) ([]int, []int, error)
LocateRange returns location of the range (begin:end, 1-based).
type BamTool ¶
type BamTool struct { Name string Desc string Use func(params *BamToolParams) }
type BamToolParams ¶
type BedFeature ¶
type BedFeature struct { Chr string Start int // 1based End int // end included Name *string Strand *string }
BedFeature is the gff BedFeature struct
func ReadBedFeatures ¶
func ReadBedFeatures(file string) ([]BedFeature, error)
ReadBedFeatures returns gtf BedFeatures of a file
func ReadBedFilteredFeatures ¶
func ReadBedFilteredFeatures(file string, chrs []string) ([]BedFeature, error)
ReadBedFilteredFeatures returns gtf BedFeatures of selected chrs from file
type ColorCycler ¶
ColorCycler is a utilty object to cycle between colors and colorize text.
func NewColorCycler ¶
func NewColorCycler(dummy bool) *ColorCycler
NewColorCycler return a new color cycler object.
func PrettyPrintTsv ¶
PrettyPrintTsv pretty prints and optionally colorizes a "data frame".
func (*ColorCycler) Colorize ¶
func (p *ColorCycler) Colorize(s string) string
Colorize adds the current ANSI color to the text.
func (*ColorCycler) Fancy ¶
func (p *ColorCycler) Fancy(s string, head bool) string
Fancy colorizes text with normal or header styles.
func (*ColorCycler) Header ¶
func (p *ColorCycler) Header(s string) string
Colorize adds the current ANSI color to the text with a header style.
func (*ColorCycler) WrapWriter ¶
func (p *ColorCycler) WrapWriter(fh *os.File) io.Writer
WrapWriter wraps a file into am go-colorable object if necessary.
type Config ¶
type Config struct { Alphabet *seq.Alphabet ChunkSize int BufferSize int Threads int LineWidth int IDRegexp string IDNCBI bool OutFile string Quiet bool AlphabetGuessSeqLength int ValidateSeqLength int CompressionLevel int }
Config is the global falgs
type FxWatcher ¶
type FxWatcher struct { Base string Pool *WatchedFxPool }
type ReadCounts ¶
type ReadCounts []*RefCounts
ReadCounts holds read counts for all references.
func NewReadCounts ¶
func NewReadCounts(refs []*sam.Reference) ReadCounts
NewReadCounts initializes a new read count slice.
func (ReadCounts) Sorted ¶
func (c ReadCounts) Sorted() ReadCounts
Sorted created a sorted copy of a read counts slice.
type RecordLoopBuffer ¶
type RecordLoopBuffer struct {
Size, Capacity int
Current *RecordNode
}
RecordLoopBuffer is a loop buffer for FASTA/Q records
func NewRecordLoopBuffer ¶
func NewRecordLoopBuffer(capacity int) (*RecordLoopBuffer, error)
NewRecordLoopBuffer creats new RecordLoopBuffer object with certern capacity
func (*RecordLoopBuffer) Add ¶
func (buf *RecordLoopBuffer) Add(value *fastx.Record)
Add add new RecordNode
func (*RecordLoopBuffer) Backward ¶
func (buf *RecordLoopBuffer) Backward(n int)
Backward moves the current pointer backward N nodes
func (*RecordLoopBuffer) Next ¶
func (buf *RecordLoopBuffer) Next() *RecordNode
Next returns next node
func (*RecordLoopBuffer) Prev ¶
func (buf *RecordLoopBuffer) Prev() *RecordNode
Prev returns previous node
type RecordNode ¶
RecordNode is the node for double-linked loop list
func (RecordNode) String ¶
func (node RecordNode) String() string
type RefWithFaidx ¶
type RefWithFaidx struct { Fasta string IdxFile string Cache bool // contains filtered or unexported fields }
func NewRefWitdFaidx ¶
func NewRefWitdFaidx(file string, cache bool, quiet bool) *RefWithFaidx
type Reference ¶
Reference holds information about a reference sequence along with the target ranges.
type Scorer ¶
type Scorer interface {
Score() int
}
Scorer is an interface for getting alignment score.
type SeqColorizer ¶
type SeqColorizer struct { NucPalette map[byte]au.Color ProtPalette map[byte]au.Color QualPalette map[byte]au.Color QualBgPalette map[byte]au.Color Alphabet string }
SeqColorizer is a sequence colorizer object.
func NewSeqColorizer ¶
func NewSeqColorizer(alphabet string) *SeqColorizer
NewSeqColorizer return a new sequence colorizer object.
func (*SeqColorizer) Color ¶
func (p *SeqColorizer) Color(seq []byte) []byte
ColorAmino adds ANSI colors to DNA/RNA or protein sequences.
func (*SeqColorizer) ColorAmino ¶
func (p *SeqColorizer) ColorAmino(seq []byte) []byte
ColorAmino adds ANSI colors to protein sequences.
func (*SeqColorizer) ColorNucleic ¶
func (p *SeqColorizer) ColorNucleic(seq []byte) []byte
ColorNucleic adds ANSI colors to DNA/RNA sequences.
func (*SeqColorizer) ColorNucleicWithQuals ¶
func (p *SeqColorizer) ColorNucleicWithQuals(seq []byte, quals []byte) []byte
ColorNucleic adds ANSI colors to DNA/RNA, use quality palette as background.
func (*SeqColorizer) ColorQuals ¶
func (p *SeqColorizer) ColorQuals(quals []byte) []byte
ColorAmino adds grayscale colors to DNA/RNA or protein sequences.
func (*SeqColorizer) ColorWithQuals ¶
func (p *SeqColorizer) ColorWithQuals(seq []byte, quals []byte) []byte
ColorAmino adds ANSI colors to DNA/RNA or protein sequences, use quality palette as background.
func (*SeqColorizer) WrapWriter ¶
func (p *SeqColorizer) WrapWriter(fh *os.File) io.Writer
WrapWriter wraps a file into am go-colorable object if necessary.
type SeqDetector ¶
type SeqDetector struct { Queries Queries SearchAll bool Stranded bool NullMode string Cutoff float64 AlnParams *AlnParams }
SeqDetector holds paramters for sequence detection.
func NewSeqDetector ¶
func NewSeqDetector(searchAll bool, stranded bool, nullMode string, cutoff float64, alnParams *AlnParams) *SeqDetector
NewSeqDetector initilizes a SeqDetector object.
func (*SeqDetector) AddAnonQueries ¶
func (d *SeqDetector) AddAnonQueries(qrs []string)
AddAnonQueries adds anonymous queries from a list of comma separated strings.
func (*SeqDetector) Detect ¶
func (d *SeqDetector) Detect(r *Reference, rec bool) []*AlignedSeq
Detect performs an optinally recursive alignments of the queries of a given reference sequence.
func (*SeqDetector) LoadQueries ¶
func (d *SeqDetector) LoadQueries(fx string)
LoadQueries loads queries from a fasta file and calculates null scores for each.
type SeqStreamCtrl ¶
type SeqStreamCtrl int
const ( StreamTry SeqStreamCtrl = iota StreamQuit StreamEOF StreamExited )
type Toolshed ¶
func NewToolshed ¶
func NewToolshed() Toolshed
type WatchCtrlChan ¶
type WatchCtrlChan chan WatchCtrl
type WatchedFx ¶
type WatchedFx struct { Name string LastSize int64 LastTry time.Time BytesRead int64 IsDir bool SeqChan chan *simpleSeq CtrlChanIn chan SeqStreamCtrl CtrlChanOut chan SeqStreamCtrl }
type WatchedFxPool ¶
func (*WatchedFxPool) Delete ¶
func (m *WatchedFxPool) Delete(k string)
func (*WatchedFxPool) Get ¶
func (m *WatchedFxPool) Get(k string) *WatchedFx
func (*WatchedFxPool) Insert ¶
func (m *WatchedFxPool) Insert(k string, v *WatchedFx)
func (*WatchedFxPool) IsEmpty ¶
func (m *WatchedFxPool) IsEmpty() bool
func (*WatchedFxPool) Range ¶
func (m *WatchedFxPool) Range(f func(key, value interface{}) bool)
Source Files ¶
- amplicon.go
- bam.go
- bam_toolbox.go
- bed.go
- common.go
- concat.go
- convert.go
- dup.go
- fa2fq.go
- faidx.go
- fish.go
- fq2fa.go
- fx2tab.go
- genautocomplete.go
- grep.go
- head-genome.go
- head.go
- helper.go
- locate.go
- logging.go
- merge-slides.go
- mutate.go
- pair.go
- pid_alive_unix.go
- range.go
- rename.go
- replace.go
- restart.go
- rmdup.go
- root.go
- sample.go
- sana.go
- scat.go
- seq.go
- seq_detect.go
- shuffle.go
- sliding.go
- sort.go
- split.go
- split2.go
- stat.go
- subseq.go
- sum.go
- tab2fx.go
- translate.go
- util.go
- version.go
- watch.go