util

package
v0.1.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 5, 2026 License: MIT Imports: 19 Imported by: 0

Documentation

Index

Constants

View Source
const (
	STOP = 0
	DIAG = 1 // Diagonal move (match/mismatch)
	UP   = 2 // Up move (gap in sequence B)
	LEFT = 3 // Left move (gap in sequence A)
)
View Source
const BAI1_int = 0x01494142 // 'BAI\1' in Intel CPUs
View Source
const CODE_STAT_MOUSE_EVENT = 1

Variables

This section is empty.

Functions

func AbsInt

func AbsInt(i int) int

func AffineGlobalAlignment

func AffineGlobalAlignment(query, reference string, params ScoringParams) (cigar string, score int)

AffineGlobalAlignment performs global alignment (End-to-End) so no base is left unmapped.

func AsciiColorStr

func AsciiColorStr(clr int) string

func CalcOverlap

func CalcOverlap(fe GeneFeature, sec MappedSection) int

func ChoiceInt

func ChoiceInt(ints []int) int

func ChoiceStr

func ChoiceStr(strs []string) string

func ChoiceStrExcept

func ChoiceStrExcept(strs []string, exc string) string

func ChroPos2Linear

func ChroPos2Linear(chro string, pos int) (rpos int64, err error)

func Cigar14To13

func Cigar14To13(cigar string) string

func CigarChroLen

func CigarChroLen(opts []CigarOpt) int

func CigarIndelLen

func CigarIndelLen(opts []CigarOpt) int

func CigarMappedLen

func CigarMappedLen(opts []CigarOpt, incI bool) int

func CigarReadLen

func CigarReadLen(opts []CigarOpt) int

func CompressCigar

func CompressCigar(cigar string) string

func ContainStr

func ContainStr(sli []string, target string) bool

func CropCigarOnChro

func CropCigarOnChro(cigar string, first_base_loc, wanted_first_chro_base, wanted_last_chro_base int, add_S_before_after bool) (ret string, new_mapped_loc_after_S, skipped_rbases int)

func CropCigarOnRead

func CropCigarOnRead(cigar string, chro_pos_after_S, wanted_first_read_base_0based, wanted_last_read_base_0based int, add_S_before_after bool) (ret string, new_first_base_chro_loc, skipped_chrobases int)

func DelTempFile

func DelTempFile(fn string)

func ExIntTag

func ExIntTag(extra_columns []SAMAppendix, key string) int

func ExecStream

func ExecStream(cmdstr string) (return_code int, stdout io.ReadCloser)

func ExtraTagInt

func ExtraTagInt(extra []SAMAppendix, tagname string) (found bool, tagvalue int)

func FixSoftClipping

func FixSoftClipping(pos int, cigar string) (newpos int, newcigar string)

func GetBlockInfo

func GetBlockInfo(voff uint64) (bam_offset uint64, inblock_offset uint32)

func GetIntTag

func GetIntTag(fl, tag string) int

func GlobalAlignmentCIGAR

func GlobalAlignmentCIGAR(query_seq, reference_seq string, matchScore, mismatchPenalty, gapOpenPenalty, gapExtendPenalty int) (string, int)

Scores and Penalties are all positive.

func HammingDistance

func HammingDistance(a, b string) (dist int, err error)

func HashCode

func HashCode(str string) uint

func IfElseInt

func IfElseInt(con bool, i1, i2 int) int

func IfElseStr

func IfElseStr(con bool, i1, i2 string) string

func IntersectInt

func IntersectInt(s1, s2 []int) []int

func IntersectStr

func IntersectStr(s1, s2 []string) []string

func IsFile

func IsFile(fname string) bool

func IsFusionRead

func IsFusionRead(sections []MappedSection, max_intron int) bool

func IsOverlapping

func IsOverlapping(As, Ae, Bs, Be, min_hanging_len int) bool

A and B should have at least min_hanging_len bases overlapping

func IsWellContaining

func IsWellContaining(As, Ae, Bs, Be, min_hanging_len int) bool

A should contain B; A should be much longer than B

func JoinInt

func JoinInt(split string, ints []int) string

func JoinStr

func JoinStr(split string, strs []string) string

func ListDir

func ListDir(path string) (files []string, err error)

func LoadJuncsInTranscripts

func LoadJuncsInTranscripts(GTF_fname string, gtf_gene_id, gtf_transcript_id string) (lookup_table map[string]int, err error)

lookup_table: chro:pos1 <TAB> pos2. pos1 and pos2 are both 1-base coordinates, both included in the exons. pos1 < pos2. value: 1 if positive; 2 if negative; bitwise-or

func LoadTab

func LoadTab(fn string, oldmap *map[string]string) map[string]string

func Map2Slice

func Map2Slice(m map[string]bool) []string

func MatchPrefix

func MatchPrefix(onestr string, prefix_pool []string) (prefix string, found bool)

func MaxFloat64

func MaxFloat64(i, j float64) float64

func MaxInt

func MaxInt(i, j int) int

func MaxStr

func MaxStr(a, b string) string

func MergedGaps

func MergedGaps(gaps [][2]int) [][2]int

func MinInt

func MinInt(i, j int) int

func MinStr

func MinStr(a, b string) string

func Mv

func Mv(o, n string)

func OverlapGaps

func OverlapGaps(gapA [][2]int, gapB [][2]int) [][2]int

Gaps are open gaps. Close gap of [1, 99] is [1, 100) in input. Output is also open gaps. gapA and gapB MUST be sorted by coordinates and non-overlapping.

func Phred64_to_33

func Phred64_to_33(s string) string

func Random4Bases

func Random4Bases(length int) string

func RandomBases

func RandomBases(length int) string

func RandomString

func RandomString(length int) string

func ReadFastA

func ReadFastA(fname string) (seqlist []string, seqmap map[string]string, err error)

func ReadOnerowFastA

func ReadOnerowFastA(fname string) (rname, seq string, err error)

func ReadUInt16LittleEndianFp

func ReadUInt16LittleEndianFp(fp *os.File) (ret uint16, err error)

func ReadUInt32LittleEndianFp

func ReadUInt32LittleEndianFp(fp *os.File) (ret uint32, err error)

func ReadUInt64LittleEndianFp

func ReadUInt64LittleEndianFp(fp *os.File) (ret uint64, err error)

func ReverseRead

func ReverseRead(bases, qual string) (rbases, rqual string)

func SelectQualString

func SelectQualString(rlen int, reflib string) (rqual string, floats []float64)

func SequenceErrorByQualityString

func SequenceErrorByQualityString(read, qstr string, error_scalling float64) (newread string, seq_error []bool)

func SequenceErrorByRandomQualityString

func SequenceErrorByRandomQualityString(read string, error_scalling float64) (newread, qual string, seq_error []bool)

func SequenceErrorByRandomQualityStringEx

func SequenceErrorByRandomQualityStringEx(read string, error_scalling float64, refqual string) (newread, qual string, seq_error []bool)

func SetTtyCanonical

func SetTtyCanonical(is_canon, has_echo bool)

func System

func System(cmd string) (return_code int, stdout string)

func TempFileName

func TempFileName() string

func TestTty

func TestTty()

func ULID

func ULID() string

func UUID

func UUID() string

func UXID

func UXID() string

func UnionInt

func UnionInt(s1, s2 []int) []int

func UnionStr

func UnionStr(s1, s2 []string) []string

func UniqueFeatureLength

func UniqueFeatureLength(mas []FeatureReadMatching) int

func UniqueInt

func UniqueInt(strs []int) []int

func UniqueStr

func UniqueStr(strs []string) []string

func WriteFastA

func WriteFastA(fp io.Writer, name, seq string)

func WriteFastAEx

func WriteFastAEx(fp io.Writer, name, seq string, fasta_line_width int)

Types

type AlignmentResult

type AlignmentResult struct {
	Score     int
	SequenceA string
	SequenceB string
	StartA    int // Start index in the original Sequence A
	StartB    int // Start index in the original Sequence B
}

AlignmentResult holds the results of the Smith-Waterman alignment.

func SmithWaterman

func SmithWaterman(seqA, seqB string, matchScore, mismatchScore, gapCreationPenalty, gapExtensionPenalty int) AlignmentResult

SmithWaterman performs local sequence alignment using a traceback matrix for correctness.

func SmithWaterman3P

func SmithWaterman3P(seqA, seqB string, matchScore, mismatchScore, gapPenalty int) AlignmentResult

SmithWaterman performs local sequence alignment using a traceback matrix for correctness.

type AsGap

type AsGap [][2]int

func (AsGap) Len

func (a AsGap) Len() int

func (AsGap) Less

func (a AsGap) Less(i, j int) bool

func (AsGap) Swap

func (a AsGap) Swap(i, j int)

type BamReader

type BamReader struct {
	// contains filtered or unexported fields
}

func BAMopen

func BAMopen(fname string) (br *BamReader, err error)

func (*BamReader) Array

func (qf *BamReader) Array() (strarr []string, err error)

func (*BamReader) BamHeader

func (qf *BamReader) BamHeader() string

func (*BamReader) Close

func (br *BamReader) Close()

func (*BamReader) GetChromosomeInfo

func (qf *BamReader) GetChromosomeInfo(chro_name string) *ChromosomeInfo

func (*BamReader) Line

func (br *BamReader) Line() (st string, err error)

func (*BamReader) ReloadLine

func (br *BamReader) ReloadLine()

func (*BamReader) SAMRecord

func (qf *BamReader) SAMRecord() (read_name string, flags int, chro string, pos, mapq int, cigar, mate_chro string, mate_pos, tlen int, seq, qual string, err error)

func (*BamReader) SAMRecordEx

func (qf *BamReader) SAMRecordEx() (read_name string, flags int, chro string, pos, mapq int, cigar, mate_chro string, mate_pos, tlen int, seq, qual string, extra_columns []SAMAppendix, err error)

type BucketedTable

type BucketedTable struct {
	// contains filtered or unexported fields
}

func CreateBucketedTable

func CreateBucketedTable(granularity int) *BucketedTable

granularity must be longer than the longest read length

func (*BucketedTable) Append

func (who *BucketedTable) Append(chroname string, pos int, value string)

func (*BucketedTable) Lookup

func (who *BucketedTable) Lookup(chroname string, read_start_pos, read_len int) (ret []string, found bool)

this will lookup all events in the read, given that the reat starts at read_start_pos, and is shorter than granularity.

type ChromosomeInfo

type ChromosomeInfo struct {
	Name   string
	Length int
}

type CigarOpt

type CigarOpt struct {
	OptType byte
	OptLen  int
}

func ParseCigar

func ParseCigar(cigar string) []CigarOpt

func ParseCigar14

func ParseCigar14(cigar string) []CigarOpt

type Exon

type Exon struct {
	Chro        string
	Start, Stop int //Both are 1-base coordinates. Both inclusive in this exon. The same as in GTF format.
	IsNegative  bool
}

type FeatureIndex

type FeatureIndex struct {
	RawFeatures []*GeneFeature
	// contains filtered or unexported fields
}

func CreateFeatureIndex

func CreateFeatureIndex() *FeatureIndex

func LoadFeatureTable

func LoadFeatureTable(fname string, include_intron bool) (FeatureIndex *FeatureIndex, err error)

func (*FeatureIndex) Append

func (self *FeatureIndex) Append(chro string, start, end int, feat *GeneFeature)

func (*FeatureIndex) Features

func (self *FeatureIndex) Features(sec MappedSection) []*GeneFeature

func (*FeatureIndex) GetGeneCount

func (self *FeatureIndex) GetGeneCount() int

func (*FeatureIndex) IsGeneNeighbours

func (self *FeatureIndex) IsGeneNeighbours(id1, id2 string) bool

func (*FeatureIndex) IsNeighbourFeatures

func (self *FeatureIndex) IsNeighbourFeatures(f1, f2 GeneFeature) bool

func (*FeatureIndex) Lookup

func (self *FeatureIndex) Lookup(chro string, start, stop int) []*GeneFeature

type FeatureReadMatching

type FeatureReadMatching struct {
	Feature                                 GeneFeature
	ChroStart, ChroStop, ReadStart, ReadLen int // ChroStop is inclusive (len = Stop - Start + 1)
}

type FusionEvent

type FusionEvent struct {
	Chro1, Chro2                          string
	Pos1, Pos2                            int
	Cross, ExtendToSmall1, ExtendToSmall2 bool
	ExtensionLength1, ExtensionLength2    int
	EventType                             string
	NSup                                  int
}

func ExtractFusionEvents

func ExtractFusionEvents(sections []MappedSection, read_on_negative bool) []FusionEvent

func LoadFusions

func LoadFusions(ff string) (fusions []FusionEvent, err error)

func LoadReportedJunctions

func LoadReportedJunctions(JuncOut_file string) (junctions []FusionEvent, err error)

func (FusionEvent) Key

func (self FusionEvent) Key() string

type GeneFeature

type GeneFeature struct {
	Chro             string
	Start, Stop      int // Stop is inclusive (len = Stop - Start + 1)
	GeneID           string
	IsNegativeStrand bool
	ExonID           int
	FeatureIndex     int
}

func LoadFeatures

func LoadFeatures(fname string, include_intron bool) (features []*GeneFeature, neighbour_gene_table map[string]bool, err error)

func LoadFeaturesEx

func LoadFeaturesEx(fname string, include_intron bool, gtf_gene_id string) (features []*GeneFeature, neighbour_gene_table map[string]bool, err error)

func MergeSortFeatures

func MergeSortFeatures(gfs []*GeneFeature) []*GeneFeature

func (*GeneFeature) Key

func (self *GeneFeature) Key() string

type MappedSection

type MappedSection struct {
	Chro                string
	Pos                 int
	Cigar               string
	ReadLength          int
	IsNegative          bool
	IsMainAlignment     bool
	IsCoordinateGoingUp bool

	ConnectToLeft  bool
	ConnectToRight bool

	ReadPosition     int
	ChromosomeLength int
}

func ParseCigarSections

func ParseCigarSections(chro string, pos int, cigar string, flags int) []MappedSection

func ParseFusionSections

func ParseFusionSections(chro string, pos int, cigar string, flags int, appendix []SAMAppendix) []MappedSection

type MappingCoverage

type MappingCoverage struct {
	// contains filtered or unexported fields
}

func LoadCoverage

func LoadCoverage(prefix string) (mc *MappingCoverage, err error)

func (*MappingCoverage) Coverage

func (mc *MappingCoverage) Coverage(chro string, pos int) (coverage int, err error)

func (*MappingCoverage) MeanCoverage

func (mc *MappingCoverage) MeanCoverage(chro string, pos, bases int) (meancoverage int, err error)

type MatrixCell

type MatrixCell struct {
	// contains filtered or unexported fields
}

MatrixCell holds scores for the three affine states.

type MyError

type MyError struct {
	// contains filtered or unexported fields
}

func NewError

func NewError(msg string) *MyError

func (*MyError) Error

func (self *MyError) Error() string

type Qfile

type Qfile struct {
	// contains filtered or unexported fields
}

func Qopen

func Qopen(filename string) (qfp *Qfile, err error)

func QopenGz

func QopenGz(gzfile string) (qfp *Qfile, err error)

func Qstream

func Qstream(raw_reader *io.ReadCloser) (qfp *Qfile, err error)

func (*Qfile) Array

func (qf *Qfile) Array() (strarr []string, err error)

func (*Qfile) Close

func (qf *Qfile) Close() (err error)

func (*Qfile) GetChromosomeInfo

func (qf *Qfile) GetChromosomeInfo(chro_name string) *ChromosomeInfo

func (*Qfile) Line

func (qf *Qfile) Line() (str string, err error)

func (*Qfile) ReloadLine

func (qf *Qfile) ReloadLine()

func (*Qfile) Rewind

func (qf *Qfile) Rewind()

func (*Qfile) SAMRecord

func (qf *Qfile) SAMRecord() (read_name string, flags int, chro string, pos, mapq int, cigar, mate_chro string, mate_pos, tlen int, seq, qual string, err error)

func (*Qfile) SAMRecordEx

func (qf *Qfile) SAMRecordEx() (read_name string, flags int, chro string, pos, mapq int, cigar, mate_chro string, mate_pos, tlen int, seq, qual string, extra_columns []SAMAppendix, err error)

func (*Qfile) SetFileInfo

func (qf *Qfile) SetFileInfo(info string)

func (*Qfile) Split

func (qf *Qfile) Split(sep string) (strarr []string, err error)

func (*Qfile) TrimLine

func (qf *Qfile) TrimLine() (str string, err error)

type RawBAIChromosome

type RawBAIChromosome struct {
	BinNumbers   []int
	ChunksInBins [][][2]uint64
}

func GetAllChromosomesBAI

func GetAllChromosomesBAI(fname string) (chroinfo []RawBAIChromosome, window16offsets [][]uint64, err error)

type RawBamChroInfo

type RawBamChroInfo struct {
	ChroName   string
	ChroLength int32
}

type RawBamFile

type RawBamFile struct {
	Chro_Info   []RawBamChroInfo
	Read_Groups []ReadGroupInfo
	// contains filtered or unexported fields
}

func RawBamOpen

func RawBamOpen(fn string) (fp *RawBamFile, err error)

func (*RawBamFile) Close

func (rb *RawBamFile) Close()

func (*RawBamFile) FindAllStringTags

func (rb *RawBamFile) FindAllStringTags(b []byte, tagname string) []string

func (*RawBamFile) NextReadBin

func (rb *RawBamFile) NextReadBin() (ret []byte, vfile_pos_rstart int64, err error)

func (*RawBamFile) Read

func (rb *RawBamFile) Read(data []byte) (rlen int, err error)

func (*RawBamFile) ReadBinBasicInfo

func (rb *RawBamFile) ReadBinBasicInfo(b []byte) (readname string, flag int32, chro string, pos, cigar_opts, read_len int32, extra_data []byte)

type ReadGroupInfo

type ReadGroupInfo struct {
	GroupName string
}

type SAMAppendix

type SAMAppendix struct {
	TagName  string
	TagType  byte
	TagValue string
}

func SAMRecordParser

func SAMRecordParser(fli []string) (read_name string, flags int, chro string, pos, mapq int, cigar, mate_chro string, mate_pos, tlen int, seq, qual string, extra_columns []SAMAppendix, err error)

type SamBamReader

type SamBamReader interface {
	SAMRecordEx() (read_name string, flags int, chro string, pos, mapq int, cigar, mate_chro string, mate_pos, tlen int, seq, qual string, extra_columns []SAMAppendix, err error)
	SAMRecord() (read_name string, flags int, chro string, pos, mapq int, cigar, mate_chro string, mate_pos, tlen int, seq, qual string, err error)
	GetChromosomeInfo(chro_name string) *ChromosomeInfo
}

type SamBuffered

type SamBuffered struct {
	// contains filtered or unexported fields
}

func CreateBufferedSam

func CreateBufferedSam(qs *Qfile) *SamBuffered

func (*SamBuffered) GetNextPair

func (sb *SamBuffered) GetNextPair()

func (*SamBuffered) HasThisName

func (sb *SamBuffered) HasThisName(rname string) (hasThis bool, chr1, chr2 string, flag1, flag2, pos1, pos2 int, cigar1, cigar2 string, ex1, ex2 []SAMAppendix, isEOF bool)

type ScoringParams

type ScoringParams struct {
	MatchScore       int
	MismatchPenalty  int
	GapCreatePenalty int // Cost to open a new gap
	GapExtPenalty    int // Cost to extend an existing gap
}

type SmithParams

type SmithParams struct {
	MatchScore       int
	MismatchPenalty  int
	GapOpenPenalty   int
	GapExtendPenalty int
}

SmithParams holds the penalties and scores. Note: ALL positive values

type SmithResult

type SmithResult struct {
	CIGAR    string
	RefStart int // 0-based index start in Reference
	RefEnd   int // 0-based index end in Reference (exclusive)
	Score    int
	QuerySeq string
	RefSeq   string
}

SmithResult holds the output of the algorithm.

func SmithWatermanSemiGlobal

func SmithWatermanSemiGlobal(query, ref string, params SmithParams) SmithResult

SmithWatermanSemiGlobal performs alignment where the entire query must be mapped, but it can map to any substring of the reference.

type SortedFeatureReadMatching

type SortedFeatureReadMatching []FeatureReadMatching

func (SortedFeatureReadMatching) Len

func (mas SortedFeatureReadMatching) Len() int

func (SortedFeatureReadMatching) Less

func (mas SortedFeatureReadMatching) Less(i, j int) bool

func (SortedFeatureReadMatching) Swap

func (mas SortedFeatureReadMatching) Swap(i, j int)

type SortedTable

type SortedTable struct {
	SortedEntries []int
}

func CreateSortedTable

func CreateSortedTable() *SortedTable

func (*SortedTable) Append

func (me *SortedTable) Append(pos int)

func (*SortedTable) LessOrEqual

func (me *SortedTable) LessOrEqual(pos int) int

func (*SortedTable) PrintEntries

func (me *SortedTable) PrintEntries()

func (*SortedTable) Range

func (me *SortedTable) Range(pos_start, pos_stop int) []int

func (*SortedTable) Sort

func (me *SortedTable) Sort()

type SortingFeature

type SortingFeature []*GeneFeature

func (SortingFeature) Len

func (s SortingFeature) Len() int

func (SortingFeature) Less

func (s SortingFeature) Less(i, j int) bool

func (SortingFeature) Swap

func (s SortingFeature) Swap(i, j int)

type Transcript

type Transcript struct {
	GeneID, TranscriptID string
	Exons                []Exon
}

func LoadTranscripts

func LoadTranscripts(GTF_fname string, gtf_gene_id, gtf_transcript_id string) (txns []Transcript, err error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL