genomeGraph

package
v1.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 1, 2024 License: BSD-3-Clause Imports: 22 Imported by: 0

Documentation

Overview

Package genomeGraph has structs and tools for reading, writing, editing and aligning graph representations of genomes

Index

Constants

This section is empty.

Variables

View Source
var HumanChimpTwoScoreMatrix = [][]int64{
	{90, -330, -236, -356, -208},
	{-330, 100, -318, -236, -196},
	{-236, -318, 100, -330, -196},
	{-356, -236, -330, 90, -208},
	{-208, -196, -196, -208, -202},
}
View Source
var HumanChimpTwoScoreMatrixNoGap = [][]int64{
	{90, -330, -236, -356},
	{-330, 100, -318, -236},
	{-236, -318, 100, -330},
	{-356, -236, -330, 90},
}

Functions

func AddEdge

func AddEdge(u, v *Node, p float32)

AddEdge will append two edges one forward and one backwards for any two given node. Provide a probability float32 to specify a weight for an edge to describe the more likely path through the graph.

func AddPath

func AddPath(allPaths []uint32, newPath uint32) []uint32

func AddSClip

func AddSClip(front int, lengthOfRead int, cig []cigar.Cigar) []cigar.Cigar

func AllAreEqual

func AllAreEqual(alpha []*Node, beta []*Node) bool

func AllAreEqualIgnoreOrder

func AllAreEqualIgnoreOrder(alpha []*Node, beta []*Node) bool

func BasesInGraph

func BasesInGraph(g *GenomeGraph) int

BasesInGraph will calculate the number of bases contained in GenomeGraph using dnaTwoBit.

func BlastSeed

func BlastSeed(seed *SeedDev, read fastq.Fastq, scoreMatrix [][]int64) int64

func BreakNonContiguousGraph

func BreakNonContiguousGraph(g []Node) [][]*Node

TODO: possible to order nodes while breaking discontiguous graphs??? BreakNonContiguousGraph will return a slice of graphs ([]*Node) such that each graph in the slice is contiguous.

func CatPaths

func CatPaths(currPaths []uint32, newPaths []uint32) []uint32

func ChromAndPosToNumber

func ChromAndPosToNumber(chrom int, start int) uint64

func CompareBlastScore

func CompareBlastScore(a *SeedDev, b *SeedDev, read fastq.Fastq, scoreMatrix [][]int64) int

func CompareLenSeedDev

func CompareLenSeedDev(a *SeedDev, b *SeedDev) int

func CompareSeedDev

func CompareSeedDev(a *SeedDev, b *SeedDev) int

func GetSortOrder

func GetSortOrder(g *GenomeGraph) []uint32

GetSortOrder will perform a breadth first search (BFS) on a graph and return an output slice where output[sortedIdx] = originalIdx.

func GirafPairToSam

func GirafPairToSam(ag giraf.GirafPair) sam.MatePair

func GirafToSam

func GirafToSam(ag *giraf.Giraf) sam.Sam

func GraphSmithWatermanMemPool

func GraphSmithWatermanMemPool(gg *GenomeGraph, read fastq.FastqBig, seedHash map[uint64][]uint64, seedLen int, stepSize int, scoreMatrix [][]int64, m [][]int64, trace [][]rune, memoryPool **SeedDev) sam.Sam

func GraphSmithWatermanToGiraf

func GraphSmithWatermanToGiraf(gg *GenomeGraph, read fastq.FastqBig, seedHash map[uint64][]uint64, seedLen int, stepSize int, matrix *MatrixAln, scoreMatrix [][]int64, seedPool *sync.Pool, dnaPool *sync.Pool, sk scoreKeeper, dynamicScore dynamicScoreKeeper, seedBuildHelper *seedHelper) *giraf.Giraf

func IndexGenomeIntoMap

func IndexGenomeIntoMap(genome []Node, seedLen int, seedStep int) map[uint64][]uint64

func LeftAlignTraversal

func LeftAlignTraversal(n *Node, seq []dna.Base, refEnd int, currentPath []uint32, extension int, read []dna.Base, scores [][]int64, matrix *MatrixAln, sk scoreKeeper, dynamicScore dynamicScoreKeeper, pool *sync.Pool) ([]cigar.ByteCigar, int64, int, int, []uint32)

func LeftDynamicAln

func LeftDynamicAln(alpha []dna.Base, beta []dna.Base, scores [][]int64, matrix *MatrixAln, gapPen int64, dynamicScore dynamicScoreKeeper) (int64, []cigar.ByteCigar, int, int)

func LeftLocal

func LeftLocal(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, trace [][]rune) (int64, []cigar.Cigar, int, int, int, int)

func LocalView

func LocalView(samLine *sam.Sam, ref []*Node) string

func MatrixSetup

func MatrixSetup(size int) ([][]int64, [][]byte)

func MismatchStats

func MismatchStats(scoreMatrix [][]int64) (int64, int64, int64, int64)

func ModifySamToString

func ModifySamToString(aln sam.Sam, samflag bool, rname bool, pos bool, mapq bool, cig bool, rnext bool, pnext bool, tlen bool, seq bool, qual bool, extra bool) string

func NeedlemanWunsch

func NeedlemanWunsch(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, trace [][]rune) (int64, []cigar.Cigar)

func NewDnaPool

func NewDnaPool() sync.Pool

func NewMemSeedPool

func NewMemSeedPool() sync.Pool

func NodePosToReadPos

func NodePosToReadPos(graph *GenomeGraph, read *giraf.Giraf, node uint32, pos uint32) uint32

func PathToSeq

func PathToSeq(p giraf.Path, genome *GenomeGraph) []dna.Base

func PathToString

func PathToString(allPaths []uint32) string

func PrintGraph

func PrintGraph(gg *GenomeGraph)

PrintGraph will quickly print simpleGraph to standard out.

func RandGiraf

func RandGiraf(graph *GenomeGraph, numReads int, readLen int, randSeed int64) []*giraf.Giraf

func RandLocation

func RandLocation(genome *GenomeGraph) (uint32, uint32)

func RandLocationFast

func RandLocationFast(genome *GenomeGraph, totalBases int) (uint32, uint32)

func RandPathFwd

func RandPathFwd(genome *GenomeGraph, nodeIdx uint32, pos uint32, length int) ([]uint32, uint32, []dna.Base)

func RandSomaticMutations

func RandSomaticMutations(graph *GenomeGraph, reads []*giraf.Giraf, numSomaticSNV int, AlleleFrequency float64, randSeed int64) ([]uint32, []uint32)

TODO: simulate indels.

func RandomPairedReads

func RandomPairedReads(genome *GenomeGraph, readLength int, numReads int, numChanges int) []fastq.PairedEnd

func RandomReads

func RandomReads(genome *GenomeGraph, readLength int, numReads int, numChanges int) []fastq.Fastq

func ReversePath

func ReversePath(alpha []uint32)

func RightAlignTraversal

func RightAlignTraversal(n *Node, seq []dna.Base, start int, currentPath []uint32, extension int, read []dna.Base, scoreMatrix [][]int64, matrix *MatrixAln, sk scoreKeeper, dynamicScore dynamicScoreKeeper, pool *sync.Pool) ([]cigar.ByteCigar, int64, int, int, []uint32)

func RightDynamicAln

func RightDynamicAln(alpha []dna.Base, beta []dna.Base, scores [][]int64, matrix *MatrixAln, gapPen int64, dynamicScore dynamicScoreKeeper) (int64, []cigar.ByteCigar, int, int)

func RightLocal

func RightLocal(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, trace [][]rune) (int64, []cigar.Cigar, int, int, int, int)

func RoutineFqPairToGiraf

func RoutineFqPairToGiraf(gg *GenomeGraph, seedHash map[uint64][]uint64, seedLen int, stepSize int, scoreMatrix [][]int64, input <-chan fastq.PairedEndBig, output chan<- giraf.GirafPair, wg *sync.WaitGroup)

func RoutineFqToGiraf

func RoutineFqToGiraf(gg *GenomeGraph, seedHash map[uint64][]uint64, seedLen int, stepSize int, scoreMatrix [][]int64, inputChan <-chan fastq.FastqBig, outputChan chan<- giraf.Giraf, wg *sync.WaitGroup)

Goroutine worker functions.

func RoutineGirafToSam

func RoutineGirafToSam(gg *GenomeGraph, seedHash map[uint64][]uint64, seedLen int, stepSize int, scoreMatrix [][]int64, input <-chan fastq.PairedEndBig, output chan<- sam.Sam, wg *sync.WaitGroup)

func RoutineGirafToSamSingle

func RoutineGirafToSamSingle(gg *GenomeGraph, seedHash map[uint64][]uint64, seedLen int, stepSize int, scoreMatrix [][]int64, inputChan <-chan fastq.FastqBig, outputChan chan<- sam.Sam, wg *sync.WaitGroup)

func SetEvenWeights

func SetEvenWeights(u *Node)

SetEvenWeights will loop through a slice of edges and set the probability weight divided by the length of the slice.

func SimpleWriteGirafPair

func SimpleWriteGirafPair(filename string, input <-chan giraf.GirafPair, wg *sync.WaitGroup)

func SmithWaterman

func SmithWaterman(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, trace [][]rune) (int64, []cigar.Cigar, int64, int64, int64, int64)

func SoftClipBases

func SoftClipBases(front int, lengthOfRead int, cig []cigar.ByteCigar) []cigar.ByteCigar

func SortBlastz

func SortBlastz(seeds []*SeedDev, read fastq.Fastq, scoreMatrix [][]int64)

func SortById

func SortById(seqs []*Node)

func SortBySeq

func SortBySeq(seqs []*Node)

func SortSeedDevByLen

func SortSeedDevByLen(seeds []*SeedDev)

func SortSeedDevByTotalLen

func SortSeedDevByTotalLen(seeds []*SeedDev)

func SortSeedLen

func SortSeedLen(seeds []SeedDev)

func ViewGraphAlignment

func ViewGraphAlignment(g *giraf.Giraf, genome *GenomeGraph) string

func ViewMatrix

func ViewMatrix(m [][]int64) string

func WrapPairGiraf

func WrapPairGiraf(gg *GenomeGraph, fq fastq.PairedEndBig, seedHash map[uint64][]uint64, seedLen int, stepSize int, matrix *MatrixAln, scoreMatrix [][]int64, seedPool *sync.Pool, dnaPool *sync.Pool, sk scoreKeeper, dynamicScore dynamicScoreKeeper, seedBuildHelper *seedHelper) giraf.GirafPair

func Write

func Write(filename string, sg *GenomeGraph)

Write function will process GenomeGraph and write the data to a file.

func WriteToGraphHandle

func WriteToGraphHandle(file io.Writer, gg *GenomeGraph, lineLength int)

WriteToGraphHandle will help with any error handling when writing GenomeGraph to file.

Types

type Edge

type Edge struct {
	Dest *Node
	Prob float32
}

Edge describes the neighboring nodes and a weighted probability of the more likely path.

type GenomeGraph

type GenomeGraph struct {
	Nodes []Node
}

GenomeGraph struct contains a slice of Nodes.

func EmptyGraph

func EmptyGraph() *GenomeGraph

EmptyGraph will allocate a new zero pointer to a simple graph and will allocate memory for the Nodes of the graph.

func Read

func Read(filename string) *GenomeGraph

Read will process a simple graph formated text file and parse the data into graph fields.

func SortGraph

func SortGraph(g *GenomeGraph) *GenomeGraph

SortGraph will reorder nodes in a graph such that the order and Ids of the output graph are topologically sorted.

func VariantGraph

func VariantGraph(ref <-chan fasta.Fasta, vcfMap map[string][]vcf.Vcf) *GenomeGraph

type MatrixAln

type MatrixAln struct {
	// contains filtered or unexported fields
}

func NewSwMatrix

func NewSwMatrix(size int) MatrixAln

type Node

type Node struct {
	Id        uint32
	ColumnId  uint32
	Seq       []dna.Base        // only this field or the SeqThreeBit will be kept
	SeqTwoBit *dnaTwoBit.TwoBit // this will change to a ThreeBit or be removed
	Prev      []Edge
	Next      []Edge

} // used to have Name (string) and Info (Annotation)

Node is uniquely definded by Id and is encoded with information describing sequence order and orientation and annotated variance.

func AddNode

func AddNode(g *GenomeGraph, n *Node) *Node

AddNode will add the values in n to the graph at the index of n.Id A pointer to the new location of the node (inside the graph) is returned.

func FaSeqToNode

func FaSeqToNode(target fasta.Fasta, query fasta.Fasta, tStart int, qStart int, cigar align.Cigar, index int) (*Node, int, int)

FaSeqToNode is a general function used create a new node based on a target fasta, query fasta and a cigar operation. In addition, given two indices, it will update start/end for the subset of bases used to create the new Node. TODO: Add logic for correct node name annotation convention.

type ScoreMatrixHelper

type ScoreMatrixHelper struct {
	Matrix                         [][]int64
	MaxMatch                       int64
	MinMatch                       int64
	LeastSevereMismatch            int64
	LeastSevereMatchMismatchChange int64
}

type SeedDev

type SeedDev struct {
	TargetId    uint32
	TargetStart uint32
	QueryStart  uint32
	Length      uint32
	PosStrand   bool
	TotalLength uint32
	NextPart    *SeedDev
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL