align

package
v0.0.0-...-a011eca Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 4, 2022 License: BSD-3-Clause, GPL-2.0, BSD-3-Clause, + 1 more Imports: 10 Imported by: 0

Documentation

Overview

Package align allows aligning Antha sequences using the biogo implementation of the Needleman-Wunsch and Smith-Waterman alignment algorithms

Index

Constants

View Source
const GAP rune = rune('-')

GAP defines a standard character representing an alignment gap

View Source
const OutputMatch = "|"

OutputMatch defines a character representing an alignment match

View Source
const OutputMismatch = " "

OutputMismatch defines a character representing an alignment mismatch

Variables

View Source
var (
	// Fitted is the linear gap penalty fitted Needleman-Wunsch aligner type.
	//		   Query letter
	//  	 -	 A	 C	 G	 T
	// -	 0	-5	-5	-5	-5
	// A	-5	10	-3	-1	-4
	// C	-5	-3	 9	-5	 0
	// G	-5	-1	-5	 7	-3
	// T	-5	-4	 0	-3	 8
	Fitted = align.Fitted{
		{0, -5, -5, -5, -5},
		{-5, 10, -3, -1, -4},
		{-5, -3, 9, -5, 0},
		{-5, -1, -5, 7, -3},
		{-5, -4, 0, -3, 8},
	}

	// FittedAffine is the affine gap penalty fitted Needleman-Wunsch aligner type.
	//		   Query letter
	//  	 -	 A	 C	 G	 T
	// -	 0	-1	-1	-1	-1
	// A	-1	 1	-1	-1	-1
	// C	-1	-1	 1	-1	-1
	// G	-1	-1	-1	 1	-1
	// T	-1	-1	-1	-1	 1
	//
	// Gap open: -5
	FittedAffine = align.FittedAffine{
		Matrix: align.Linear{
			{0, -1, -1, -1, -1},
			{-1, 1, -1, -1, -1},
			{-1, -1, 1, -1, -1},
			{-1, -1, -1, 1, -1},
			{-1, -1, -1, -1, 1},
		},
		GapOpen: -5,
	}

	// NW is the linear gap penalty Needleman-Wunsch aligner type.
	//		   Query letter
	//  	 -	 A	 C	 G	 T
	// -	 0	-5	-5	-5	-5
	// A	-5	10	-3	-1	-4
	// C	-5	-3	 9	-5	 0
	// G	-5	-1	-5	 7	-3
	// T	-5	-4	 0	-3	 8
	NW = align.NW{
		{0, -5, -5, -5, -5},
		{-5, 10, -3, -1, -4},
		{-5, -3, 9, -5, 0},
		{-5, -1, -5, 7, -3},
		{-5, -4, 0, -3, 8},
	}

	// NWAffine is the affine gap penalty Needleman-Wunsch aligner type.
	//		   Query letter
	//  	 -	 A	 C	 G	 T
	// -	 0	-1	-1	-1	-1
	// A	-1	 1	-1	-1	-1
	// C	-1	-1	 1	-1	-1
	// G	-1	-1	-1	 1	-1
	// T	-1	-1	-1	-1	 1
	//
	// Gap open: -5
	NWAffine = align.NWAffine{
		Matrix: align.Linear{
			{0, -1, -1, -1, -1},
			{-1, 1, -1, -1, -1},
			{-1, -1, 1, -1, -1},
			{-1, -1, -1, 1, -1},
			{-1, -1, -1, -1, 1},
		},
		GapOpen: -5,
	}

	// SW1 is the Smith-Waterman aligner type. Matrix is a square scoring matrix with the last column and last row specifying gap penalties. Currently gap opening is not considered.
	// w(gap) = -1
	// w(match) = +2
	// w(mismatch) = -1
	SW1 = align.SW{
		{0, -1, -1, -1, -1},
		{-1, 2, -1, -1, -1},
		{-1, -1, 2, -1, -1},
		{-1, -1, -1, 2, -1},
		{-1, -1, -1, -1, 2},
	}

	// SW2 is the Smith-Waterman aligner type. Matrix is a square scoring matrix with the last column and last row specifying gap penalties. Currently gap opening is not considered.
	// w(gap) = 0
	// w(match) = +2
	// w(mismatch) = -1
	SW2 = align.SW{
		{0, 0, 0, 0, 0},
		{0, 2, -1, -1, -1},
		{0, -1, 2, -1, -1},
		{0, -1, -1, 2, -1},
		{0, -1, -1, -1, 2},
	}

	// SWAffine is the affine gap penalty Smith-Waterman aligner type.
	//		   Query letter
	//  	 -	 A	 C	 G	 T
	// -	 0	-1	-1	-1	-1
	// A	-1	 1	-1	-1	-1
	// C	-1	-1	 1	-1	-1
	// G	-1	-1	-1	 1	-1
	// T	-1	-1	-1	-1	 1
	//
	// Gap open: -5
	SWAffine = align.SWAffine{
		Matrix: align.Linear{
			{0, -1, -1, -1, -1},
			{-1, 1, -1, -1, -1},
			{-1, -1, 1, -1, -1},
			{-1, -1, -1, 1, -1},
			{-1, -1, -1, -1, 1},
		},
		GapOpen: -5,
	}
)
View Source
var Algorithms = map[string]ScoringMatrix{
	"Fitted":       Fitted,
	"FittedAffine": FittedAffine,
	"NW":           NW,
	"NWAffine":     NWAffine,
	"SW1":          SW1,
	"SW2":          SW2,
	"SWAffine":     SWAffine,
}

Algorithms provides a map to lookup ScoringMatrix algorithms based on names. Algorithms available: Fitted: a modified Needleman-Wunsch algorithm which finds a local region of the reference with high similarity to the query. FittedAffine: a modified Needleman-Wunsch algorithm which finds a local region of the reference with high similarity to the query. NW: the Needleman-Wunsch algorithm NWAffine: the affine gap penalty Needleman-Wunsch algorithm SW1 and SW2: the Smith-Waterman algorithm

Functions

This section is empty.

Types

type Alignment

type Alignment struct {
	TemplateResult    string
	QueryResult       string
	Raw               []RawAlignment
	TemplatePositions []int
	QueryPositions    []int
	Score             int
}

Alignment stores the string result of an alignment of a query sequence against a template The original RawAlignments are also included

func (*Alignment) Match

func (a *Alignment) Match() string

Match produces a formatted line indicating matches between aligned sequences

GCTTTTTTAT res1
|   |||||| <- like this
GGG-TTTTAT res2

func (*Alignment) QueryEnd

func (a *Alignment) QueryEnd() int

QueryEnd returns the end position of the alignment in the query

func (*Alignment) QueryFrame

func (a *Alignment) QueryFrame() int

QueryFrame returns -1 if the query is aligned the reverse direction, 1 otherwise

func (*Alignment) QueryStart

func (a *Alignment) QueryStart() int

QueryStart returns the start position of the alignment in the query

func (*Alignment) Split

func (a *Alignment) Split(maxSectionLength int) ([]Alignment, error)

Split an alignment into sections of up to a specified length, help formatting

func (*Alignment) TemplateEnd

func (a *Alignment) TemplateEnd() int

TemplateEnd returns the end position of the alignment in the template

func (*Alignment) TemplateFrame

func (a *Alignment) TemplateFrame() int

TemplateFrame returns -1 if the template is aligned the reverse direction, 1 otherwise

func (*Alignment) TemplateStart

func (a *Alignment) TemplateStart() int

TemplateStart returns the start position of the alignment in the template

type Position

type Position struct {
	Start  int
	End    int
	Length int
}

Position contains the start, end and length of an alignment in a specified sequence

type RawAlignment

type RawAlignment struct {
	TemplateAlignment Position
	QueryAlignment    Position
}

RawAlignment contains the positions aligned between the template and query sequences

type Result

type Result struct {
	Template  wtype.BioSequence
	Query     wtype.BioSequence
	Algorithm ScoringMatrix
	Alignment Alignment
}

Result stores the full results of an alignment of a query against a template sequence, including the algorithm used.

func DNA

func DNA(template, query wtype.DNASequence, alignmentMatrix ScoringMatrix) (alignment Result, err error)

DNA aligns two DNA sequences using a specified scoring algorithm. It returns an alignment description or an error if the scoring matrix is not square, or the sequence data types or alphabets do not match. algorithms available are: Fitted: a modified Needleman-Wunsch algorithm which finds a local region of the reference with high similarity to the query. FittedAffine: a modified Needleman-Wunsch algorithm which finds a local region of the reference with high similarity to the query. NW: the Needleman-Wunsch algorithm NWAffine: the affine gap penalty Needleman-Wunsch algorithm SW1 and SW2: the Smith-Waterman algorithm SWAffine: the affine gap penalty Smith-Waterman Alignment of the reverse complement of the query sequence will also be attempted and if the number of matches is higher the reverse alignment is returned. In the resulting alignment, mismatches are represented by lower case letters, gaps represented by the GAP character "-".

func DNAFwd

func DNAFwd(template, query wtype.DNASequence, alignmentMatrix ScoringMatrix) (Result, error)

DNAFwd returns an alignment of a query sequence to a template sequence in the forward frame of the template, using a specified scoring algorithm

func DNARev

func DNARev(template, query wtype.DNASequence, alignmentMatrix ScoringMatrix) (alignment Result, err error)

DNARev returns the alignment of a query sequence to a template sequence in the reverse frame of the template, using a specified scoring algorithm

func DNASet

func DNASet(query wtype.DNASequence, templates []wtype.DNASequence, alignmentMatrix ScoringMatrix, maxResults int) ([]Result, error)

DNASet aligns a query to a collection (or database) of sequences, testing both forward and reverse directions. It returns the top scoring alignment results found in rank order, up to a specified number.

func (Result) Coverage

func (r Result) Coverage() float64

Coverage returns the percentage of matching nucleotides of alignment to the template sequence a value between 0 and 1 is returned 1 = 100%; 0 = 0%

func (Result) Gaps

func (r Result) Gaps() int

Gaps returns the number of gaps in the aligned query sequence result

func (Result) Identity

func (r Result) Identity() float64

Identity returns the percentage of matching nucleotides of query in the template sequence a value between 0 and 1 is returned 1 = 100%; 0 = 0%

func (Result) LongestContinuousSequence

func (r Result) LongestContinuousSequence() wtype.DNASequence

LongestContinuousSequence returns the longest unbroken chain of matches as a dna sequence

func (Result) Matches

func (r Result) Matches() int

Matches returns the number of matched nucleotides between the aligned query sequence and aligned template sequence.

func (Result) Mismatches

func (r Result) Mismatches() int

Mismatches returns the number of mismatched nucleotides between the aligned query sequence and aligned template sequence.

func (Result) Positions

func (r Result) Positions() (result sequences.SearchResult)

Positions returns a SearchResult detailing the positions in the template sequence of the longest continuous matching sequence from the alignment.

func (Result) Score

func (r Result) Score() int

Score returns the alignment score

func (Result) String

func (r Result) String() string

String prints alignment result in form of two aligned sequence strings printed on parallel lines.

type ScoringMatrix

type ScoringMatrix interface {
	Align(reference, query align.AlphabetSlicer) ([]feat.Pair, error)
}

ScoringMatrix implements the align.Aligner interface of the biogo/align package an align.Aligner aligns the sequence data of two type-matching Slicers, returning an ordered slice of features describing matching and mismatching segments. The sequences to be aligned must have a valid gap letter in the first position of their alphabet; the alphabets {DNA,RNA}{gapped,redundant} and Protein provided by the biogo/alphabet package satisfy this.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL