Documentation ¶
Overview ¶
Package genomeGraph has structs and tools for reading, writing, editing and aligning graph representations of genomes
Index ¶
- Variables
- func AddEdge(u, v *Node, p float32)
- func AddPath(allPaths []uint32, newPath uint32) []uint32
- func AddSClip(front int, lengthOfRead int, cig []cigar.Cigar) []cigar.Cigar
- func AllAreEqual(alpha []*Node, beta []*Node) bool
- func AllAreEqualIgnoreOrder(alpha []*Node, beta []*Node) bool
- func BasesInGraph(g *GenomeGraph) int
- func BlastSeed(seed *SeedDev, read fastq.Fastq, scoreMatrix [][]int64) int64
- func BreakNonContiguousGraph(g []Node) [][]*Node
- func CatPaths(currPaths []uint32, newPaths []uint32) []uint32
- func ChromAndPosToNumber(chrom int, start int) uint64
- func CompareBlastScore(a *SeedDev, b *SeedDev, read fastq.Fastq, scoreMatrix [][]int64) int
- func CompareLenSeedDev(a *SeedDev, b *SeedDev) int
- func CompareSeedDev(a *SeedDev, b *SeedDev) int
- func GetSortOrder(g *GenomeGraph) []uint32
- func GirafPairToSam(ag giraf.GirafPair) sam.MatePair
- func GirafToSam(ag *giraf.Giraf) sam.Sam
- func GraphSmithWatermanMemPool(gg *GenomeGraph, read fastq.FastqBig, seedHash map[uint64][]uint64, ...) sam.Sam
- func GraphSmithWatermanToGiraf(gg *GenomeGraph, read fastq.FastqBig, seedHash map[uint64][]uint64, ...) *giraf.Giraf
- func IndexGenomeIntoMap(genome []Node, seedLen int, seedStep int) map[uint64][]uint64
- func LeftAlignTraversal(n *Node, seq []dna.Base, refEnd int, currentPath []uint32, extension int, ...) ([]cigar.ByteCigar, int64, int, int, []uint32)
- func LeftDynamicAln(alpha []dna.Base, beta []dna.Base, scores [][]int64, matrix *MatrixAln, ...) (int64, []cigar.ByteCigar, int, int)
- func LeftLocal(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, ...) (int64, []cigar.Cigar, int, int, int, int)
- func LocalView(samLine *sam.Sam, ref []*Node) string
- func MatrixSetup(size int) ([][]int64, [][]byte)
- func MismatchStats(scoreMatrix [][]int64) (int64, int64, int64, int64)
- func ModifySamToString(aln sam.Sam, samflag bool, rname bool, pos bool, mapq bool, cig bool, ...) string
- func NeedlemanWunsch(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, ...) (int64, []cigar.Cigar)
- func NewDnaPool() sync.Pool
- func NewMemSeedPool() sync.Pool
- func NodePosToReadPos(graph *GenomeGraph, read *giraf.Giraf, node uint32, pos uint32) uint32
- func PathToSeq(p giraf.Path, genome *GenomeGraph) []dna.Base
- func PathToString(allPaths []uint32) string
- func PrintGraph(gg *GenomeGraph)
- func RandGiraf(graph *GenomeGraph, numReads int, readLen int, randSeed int64) []*giraf.Giraf
- func RandLocation(genome *GenomeGraph) (uint32, uint32)
- func RandLocationFast(genome *GenomeGraph, totalBases int) (uint32, uint32)
- func RandPathFwd(genome *GenomeGraph, nodeIdx uint32, pos uint32, length int) ([]uint32, uint32, []dna.Base)
- func RandSomaticMutations(graph *GenomeGraph, reads []*giraf.Giraf, numSomaticSNV int, ...) ([]uint32, []uint32)
- func RandomPairedReads(genome *GenomeGraph, readLength int, numReads int, numChanges int) []fastq.PairedEnd
- func RandomReads(genome *GenomeGraph, readLength int, numReads int, numChanges int) []fastq.Fastq
- func ReversePath(alpha []uint32)
- func RightAlignTraversal(n *Node, seq []dna.Base, start int, currentPath []uint32, extension int, ...) ([]cigar.ByteCigar, int64, int, int, []uint32)
- func RightDynamicAln(alpha []dna.Base, beta []dna.Base, scores [][]int64, matrix *MatrixAln, ...) (int64, []cigar.ByteCigar, int, int)
- func RightLocal(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, ...) (int64, []cigar.Cigar, int, int, int, int)
- func RoutineFqPairToGiraf(gg *GenomeGraph, seedHash map[uint64][]uint64, seedLen int, stepSize int, ...)
- func RoutineFqToGiraf(gg *GenomeGraph, seedHash map[uint64][]uint64, seedLen int, stepSize int, ...)
- func RoutineGirafToSam(gg *GenomeGraph, seedHash map[uint64][]uint64, seedLen int, stepSize int, ...)
- func RoutineGirafToSamSingle(gg *GenomeGraph, seedHash map[uint64][]uint64, seedLen int, stepSize int, ...)
- func SetEvenWeights(u *Node)
- func SimpleWriteGirafPair(filename string, input <-chan giraf.GirafPair, wg *sync.WaitGroup)
- func SmithWaterman(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, ...) (int64, []cigar.Cigar, int64, int64, int64, int64)
- func SoftClipBases(front int, lengthOfRead int, cig []cigar.ByteCigar) []cigar.ByteCigar
- func SortBlastz(seeds []*SeedDev, read fastq.Fastq, scoreMatrix [][]int64)
- func SortById(seqs []*Node)
- func SortBySeq(seqs []*Node)
- func SortSeedDevByLen(seeds []*SeedDev)
- func SortSeedDevByTotalLen(seeds []*SeedDev)
- func SortSeedLen(seeds []SeedDev)
- func ViewGraphAlignment(g *giraf.Giraf, genome *GenomeGraph) string
- func ViewMatrix(m [][]int64) string
- func WrapPairGiraf(gg *GenomeGraph, fq fastq.PairedEndBig, seedHash map[uint64][]uint64, ...) giraf.GirafPair
- func Write(filename string, sg *GenomeGraph)
- func WriteToGraphHandle(file io.Writer, gg *GenomeGraph, lineLength int)
- type Edge
- type GenomeGraph
- type MatrixAln
- type Node
- type ScoreMatrixHelper
- type SeedDev
Constants ¶
This section is empty.
Variables ¶
var HumanChimpTwoScoreMatrix = [][]int64{
{90, -330, -236, -356, -208},
{-330, 100, -318, -236, -196},
{-236, -318, 100, -330, -196},
{-356, -236, -330, 90, -208},
{-208, -196, -196, -208, -202},
}
var HumanChimpTwoScoreMatrixNoGap = [][]int64{
{90, -330, -236, -356},
{-330, 100, -318, -236},
{-236, -318, 100, -330},
{-356, -236, -330, 90},
}
Functions ¶
func AddEdge ¶
AddEdge will append two edges one forward and one backwards for any two given node. Provide a probability float32 to specify a weight for an edge to describe the more likely path through the graph.
func AllAreEqual ¶
func AllAreEqualIgnoreOrder ¶
func BasesInGraph ¶
func BasesInGraph(g *GenomeGraph) int
BasesInGraph will calculate the number of bases contained in GenomeGraph using dnaTwoBit.
func BreakNonContiguousGraph ¶
TODO: possible to order nodes while breaking discontiguous graphs??? BreakNonContiguousGraph will return a slice of graphs ([]*Node) such that each graph in the slice is contiguous.
func ChromAndPosToNumber ¶
func CompareBlastScore ¶
func CompareLenSeedDev ¶
func CompareSeedDev ¶
func GetSortOrder ¶
func GetSortOrder(g *GenomeGraph) []uint32
GetSortOrder will perform a breadth first search (BFS) on a graph and return an output slice where output[sortedIdx] = originalIdx.
func IndexGenomeIntoMap ¶
func LeftAlignTraversal ¶
func LeftDynamicAln ¶
func MatrixSetup ¶
func ModifySamToString ¶
func NeedlemanWunsch ¶
func NewDnaPool ¶
func NewMemSeedPool ¶
func NodePosToReadPos ¶
func PathToString ¶
func PrintGraph ¶
func PrintGraph(gg *GenomeGraph)
PrintGraph will quickly print simpleGraph to standard out.
func RandLocation ¶
func RandLocation(genome *GenomeGraph) (uint32, uint32)
func RandLocationFast ¶
func RandLocationFast(genome *GenomeGraph, totalBases int) (uint32, uint32)
func RandPathFwd ¶
func RandSomaticMutations ¶
func RandSomaticMutations(graph *GenomeGraph, reads []*giraf.Giraf, numSomaticSNV int, AlleleFrequency float64, randSeed int64) ([]uint32, []uint32)
TODO: simulate indels.
func RandomPairedReads ¶
func RandomReads ¶
func ReversePath ¶
func ReversePath(alpha []uint32)
func RightAlignTraversal ¶
func RightDynamicAln ¶
func RightLocal ¶
func RoutineFqPairToGiraf ¶
func RoutineFqToGiraf ¶
func RoutineFqToGiraf(gg *GenomeGraph, seedHash map[uint64][]uint64, seedLen int, stepSize int, scoreMatrix [][]int64, inputChan <-chan fastq.FastqBig, outputChan chan<- giraf.Giraf, wg *sync.WaitGroup)
Goroutine worker functions.
func RoutineGirafToSam ¶
func RoutineGirafToSamSingle ¶
func SetEvenWeights ¶
func SetEvenWeights(u *Node)
SetEvenWeights will loop through a slice of edges and set the probability weight divided by the length of the slice.
func SimpleWriteGirafPair ¶
func SmithWaterman ¶
func SoftClipBases ¶
func SortSeedDevByLen ¶
func SortSeedDevByLen(seeds []*SeedDev)
func SortSeedDevByTotalLen ¶
func SortSeedDevByTotalLen(seeds []*SeedDev)
func SortSeedLen ¶
func SortSeedLen(seeds []SeedDev)
func ViewGraphAlignment ¶
func ViewGraphAlignment(g *giraf.Giraf, genome *GenomeGraph) string
func ViewMatrix ¶
func WrapPairGiraf ¶
func Write ¶
func Write(filename string, sg *GenomeGraph)
Write function will process GenomeGraph and write the data to a file.
func WriteToGraphHandle ¶
func WriteToGraphHandle(file io.Writer, gg *GenomeGraph, lineLength int)
WriteToGraphHandle will help with any error handling when writing GenomeGraph to file.
Types ¶
type Edge ¶
Edge describes the neighboring nodes and a weighted probability of the more likely path.
type GenomeGraph ¶
type GenomeGraph struct {
Nodes []Node
}
GenomeGraph struct contains a slice of Nodes.
func EmptyGraph ¶
func EmptyGraph() *GenomeGraph
EmptyGraph will allocate a new zero pointer to a simple graph and will allocate memory for the Nodes of the graph.
func Read ¶
func Read(filename string) *GenomeGraph
Read will process a simple graph formated text file and parse the data into graph fields.
func SortGraph ¶
func SortGraph(g *GenomeGraph) *GenomeGraph
SortGraph will reorder nodes in a graph such that the order and Ids of the output graph are topologically sorted.
func VariantGraph ¶
type MatrixAln ¶
type MatrixAln struct {
// contains filtered or unexported fields
}
func NewSwMatrix ¶
type Node ¶
type Node struct { Id uint32 ColumnId uint32 Seq []dna.Base // only this field or the SeqThreeBit will be kept SeqTwoBit *dnaTwoBit.TwoBit // this will change to a ThreeBit or be removed Prev []Edge Next []Edge } // used to have Name (string) and Info (Annotation)
Node is uniquely definded by Id and is encoded with information describing sequence order and orientation and annotated variance.
func AddNode ¶
func AddNode(g *GenomeGraph, n *Node) *Node
AddNode will add the values in n to the graph at the index of n.Id A pointer to the new location of the node (inside the graph) is returned.
func FaSeqToNode ¶
func FaSeqToNode(target fasta.Fasta, query fasta.Fasta, tStart int, qStart int, cigar align.Cigar, index int) (*Node, int, int)
FaSeqToNode is a general function used create a new node based on a target fasta, query fasta and a cigar operation. In addition, given two indices, it will update start/end for the subset of bases used to create the new Node. TODO: Add logic for correct node name annotation convention.