seq

package
v1.0.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 29, 2021 License: BSD-3-Clause Imports: 3 Imported by: 150

Documentation

Overview

Package seq provides the base for storage and manipulation of biological sequence information.

A variety of sequence types are provided by derived packages including linear and protein sequence with and without quality scores. Multiple sequence data is also supported as unaligned sets and aligned sequences.

Quality scoring is based on Phred scores, although there is the capacity to interconvert between Phred and Solexa scores and a Solexa quality package is provided, though not integrated.

Index

Constants

View Source
const (
	Start = 1 << iota
	End
)

Variables

View Source
var (
	// The default value for Qphred scores from non-quality sequences.
	DefaultQphred alphabet.Qphred = 40
	// The default encoding for Qphred scores from non-quality sequences.
	DefaultEncoding alphabet.Encoding = alphabet.Sanger
)
View Source
var FloatTolerance float64 = 1e-10

Tolerance on float comparison for DefaultQConsensus.

Functions

This section is empty.

Types

type Aligned

type Aligned interface {
	Start() int
	End() int
	Rows() int
	Column(pos int, fill bool) []alphabet.Letter
	ColumnQL(pos int, fill bool) []alphabet.QLetter
}

Aligned describes the interface for aligned multiple sequences.

type AlignedAppender

type AlignedAppender interface {
	Aligned
	AppendColumns(a ...[]alphabet.QLetter) (err error)
	AppendEach(a [][]alphabet.QLetter) (err error)
}

An AlignedAppender is a multiple sequence alignment that can append letters.

type Alphabeter

type Alphabeter interface {
	Alphabet() alphabet.Alphabet
}

type Annotation

type Annotation struct {
	ID      string
	Desc    string
	Loc     feat.Feature
	Strand  Strand
	Conform feat.Conformation
	Alpha   alphabet.Alphabet
	Offset  int
}

An Annotation is a basic linear sequence annotation type.

func (*Annotation) Alphabet

func (a *Annotation) Alphabet() alphabet.Alphabet

Alphabet return the alphabet.Alphabet used by the sequence.

func (*Annotation) CloneAnnotation

func (a *Annotation) CloneAnnotation() *Annotation

CloneAnnotation returns a pointer to a copy of the receiver.

func (*Annotation) Conformation

func (a *Annotation) Conformation() feat.Conformation

Conformation returns the sequence conformation.

func (*Annotation) Description

func (a *Annotation) Description() string

Description returns the Desc string of the sequence.

func (*Annotation) Location

func (a *Annotation) Location() feat.Feature

Location returns the Loc field of the sequence.

func (*Annotation) Moltype

func (a *Annotation) Moltype() feat.Moltype

Moltype returns the molecule type of the sequence.

func (*Annotation) Name

func (a *Annotation) Name() string

Name returns the ID string of the sequence.

func (*Annotation) Orientation

func (a *Annotation) Orientation() feat.Orientation

Orientation returns the sequence's strand as a feat.Orientation.

func (*Annotation) SetAlphabet

func (a *Annotation) SetAlphabet(n alphabet.Alphabet) error

SetAlphabet the sets the alphabet.Alphabet used by the sequence.

func (*Annotation) SetConformation

func (a *Annotation) SetConformation(c feat.Conformation) error

SetConformation sets the sequence conformation.

func (*Annotation) SetDescription

func (a *Annotation) SetDescription(d string) error

SetDescription sets the Desc string of the sequence.

func (*Annotation) SetLocation

func (a *Annotation) SetLocation(f feat.Feature) error

SetLocation sets the Loc field of the sequence.

func (*Annotation) SetName

func (a *Annotation) SetName(id string) error

SetName sets the ID string of the sequence.

func (*Annotation) SetOffset

func (a *Annotation) SetOffset(o int) error

SetOffset sets the global offset of the sequence to o.

func (*Annotation) SetOrientation

func (a *Annotation) SetOrientation(o feat.Orientation) error

SetOrientation sets the sequence'a strand from a feat.Orientation.

type Appender

type Appender interface {
	AppendLetters(...alphabet.Letter) error
	AppendQLetters(...alphabet.QLetter) error
}

An Appender can append letters.

type ConformationSetter

type ConformationSetter interface {
	SetConformation(feat.Conformation) error
}

A ConformationSetter can set its sequence conformation.

type Conformationer

type Conformationer interface {
	Conformation() feat.Conformation
}

A Conformationer can give information regarding the sequence's conformation. For the purposes of sequtils, types that are not a Conformationer are treated as linear.

type ConsenseFunc

type ConsenseFunc func(a Aligned, alpha alphabet.Alphabet, pos int, fill bool) alphabet.QLetter

ConsenseFunc is a function type that returns the consensus letter for a column of an alignment.

var (
	// The default ConsenseFunc function.
	DefaultConsensus ConsenseFunc = func(a Aligned, alpha alphabet.Alphabet, pos int, fill bool) alphabet.QLetter {
		w := make([]int, alpha.Len())
		c := a.Column(pos, fill)

		for _, l := range c {
			if alpha.IsValid(l) {
				w[alpha.IndexOf(l)]++
			}
		}

		var max, maxi int
		for i, v := range w {
			if v > max {
				max, maxi = v, i
			}
		}

		return alphabet.QLetter{
			L: alpha.Letter(maxi),
			Q: alphabet.Ephred(1 - (float64(max) / float64(len(c)))),
		}
	}

	// A default ConsenseFunc function that takes letter quality into account.
	// http://staden.sourceforge.net/manual/gap4_unix_120.html
	DefaultQConsensus ConsenseFunc = func(a Aligned, alpha alphabet.Alphabet, pos int, fill bool) alphabet.QLetter {
		w := make([]float64, alpha.Len())
		for i := range w {
			w[i] = 1
		}

		others := float64(alpha.Len() - 1)
		c := a.ColumnQL(pos, fill)
		for _, l := range c {
			if alpha.IsValid(l.L) {
				i, alt := alpha.IndexOf(l.L), l.Q.ProbE()
				p := (1 - alt)
				alt /= others
				for b := range w {
					if i == b {
						w[b] *= p
					} else {
						w[b] *= alt
					}
				}
			}
		}

		var (
			max         = 0.
			sum         float64
			best, count int
		)
		for _, p := range w {
			sum += p
		}
		for i, v := range w {
			if v /= sum; v > max {
				max, best = v, i
				count = 0
			}
			if v == max || math.Abs(max-v) < FloatTolerance {
				count++
			}
		}

		if count > 1 {
			return alphabet.QLetter{
				L: alpha.Ambiguous(),
				Q: 0,
			}
		}

		return alphabet.QLetter{
			L: alpha.Letter(best),
			Q: alphabet.Ephred(1 - max),
		}
	}
)

type Feature

type Feature interface {
	feat.Feature
	feat.Offsetter
}

A Feature describes the basis for sequence features.

type QFilter

type QFilter func(a alphabet.Alphabet, thresh alphabet.Qphred, ql alphabet.QLetter) alphabet.Letter

A QFilter returns a letter based on an alphabet, quality letter and quality threshold.

var (
	// AmbigFilter is a QFilter function that returns the given alphabet's ambiguous position
	// letter for quality letters with a quality score below the specified threshold.
	AmbigFilter QFilter = func(a alphabet.Alphabet, thresh alphabet.Qphred, l alphabet.QLetter) alphabet.Letter {
		if l.L == a.Gap() || l.Q >= thresh {
			return l.L
		}
		return a.Ambiguous()
	}

	// CaseFilter is a QFilter function that returns a lower case letter for quality letters
	// with a quality score below the specified threshold and upper case equal to or above the threshold.
	CaseFilter QFilter = func(a alphabet.Alphabet, thresh alphabet.Qphred, l alphabet.QLetter) alphabet.Letter {
		switch {
		case l.L == a.Gap():
			return l.L
		case l.Q >= thresh:
			return l.L &^ ('a' - 'A')
		}
		return l.L | ('a' - 'A')
	}
)

type Quality

type Quality interface {
	Scorer
	Copy() Quality // Return a copy of the Quality.
}

A Quality is a feature whose elements are Phred scores.

type RowAppender

type RowAppender interface {
	Rower
	AppendEach(a [][]alphabet.QLetter) (err error)
}

RowAppender is a type for sets of sequences or aligned multiple sequences that can append letters to individual or grouped sequences.

type Rower

type Rower interface {
	Rows() int
	Row(i int) Sequence
}

Rower describes the interface for sets of sequences or aligned multiple sequences.

type Scorer

type Scorer interface {
	Feature
	EAt(int) float64                     // Return the p(Error) for a specific position.
	SetE(int, float64) error             // Set the p(Error) for a specific position.
	Encoding() alphabet.Encoding         // Return the score encoding scheme.
	SetEncoding(alphabet.Encoding) error // Set the score encoding scheme.
	QEncode(int) byte                    // Encode the quality at the specified position according the the encoding scheme.
}

A Scorer is a sequence type that provides Phred-based scoring information.

type Sequence

type Sequence interface {
	Feature
	At(int) alphabet.QLetter         // Return the letter at a specific position.
	Set(int, alphabet.QLetter) error // Set the letter at a specific position.
	Alphabet() alphabet.Alphabet     // Return the Alphabet being used.
	RevComp()                        // Reverse complement the sequence.
	Reverse()                        // Reverse the order of elements in the sequence.
	New() Sequence                   // Return a zero value of the sequence type, with the same alphabet.
	Clone() Sequence                 // Return a copy of the Sequence.
	CloneAnnotation() *Annotation    // Return a copy of the sequence's annotation.
	Slicer
	Conformationer
	ConformationSetter
}

A Sequence is a feature that stores sequence information.

type Slicer

type Slicer interface {
	Slice() alphabet.Slice
	SetSlice(alphabet.Slice)
}

A Slicer returns and sets a Slice.

type Strand

type Strand int8

Strand stores linear sequence strand information.

const (
	Minus Strand = iota - 1
	None
	Plus
)

func (Strand) String

func (s Strand) String() string

Directories

Path Synopsis
Package alignment handles aligned sequences stored as columns.
Package alignment handles aligned sequences stored as columns.
Package linear handles single sequences.
Package linear handles single sequences.
Package multi handles collections of sequences as alignments or sets.
Package multi handles collections of sequences as alignments or sets.
Package sequtils provides generic functions for manipulation of biogo/seq/...
Package sequtils provides generic functions for manipulation of biogo/seq/...

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL