packed

package
v0.0.0-...-25502c3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 9, 2012 License: GPL-3.0 Imports: 10 Imported by: 0

Documentation

Overview

Package packed provides support for manipulation of single nucleic acid sequences with and without quality data.

Two basic nucleic acid sequence types are provided, Seq and QSeq.

Index

Examples

Constants

This section is empty.

Variables

View Source
var LowQFilter = func(s seq.Sequence, _ alphabet.Letter) alphabet.Letter { return s.(*QSeq).alphabet.Ambiguous() }

The default LowQFilter function for QSeq.

View Source
var QStringify = func(s seq.Polymer) string {
	t := s.(*QSeq)
	cs := make([]alphabet.Letter, 0, len(t.S))
	for _, l := range t.S {
		if alphabet.Qphred(l>>2) > t.Threshold {
			cs = append(cs, t.alphabet.Letter(int(l&0x3)))
		} else {
			cs = append(cs, t.LowQFilter(t, t.alphabet.Letter(int(l&0x3))))
		}
	}

	return alphabet.Letters(cs).String()
}

The default Stringify function for QSeq.

View Source
var Stringify = func(s seq.Polymer) string {
	cs := make([]alphabet.Letter, 0, s.Len())
	switch s := s.(*Seq); len(s.S.Letters) {
	case 0:
		break
	case 1, 2:
		for i := s.Start(); i < s.End(); i++ {
			cs = append(cs, s.At(seq.Position{Pos: i}).L)
		}
	default:

		for p, i := s.S.Letters[0], 3-int(s.S.LeftPad); i >= 0; i-- {
			cs = append(cs, s.alphabet.Letter(int(p>>(uint(i)<<1)&0x3)))
		}

		for _, p := range s.S.Letters[1 : len(s.S.Letters)-1] {
			for i := 3; i >= 0; i-- {
				cs = append(cs, s.alphabet.Letter(int(p>>(uint(i)<<1)&0x3)))
			}
		}

		for p, i := s.S.Letters[len(s.S.Letters)-1], 3; i >= int(s.S.RightPad); i-- {
			cs = append(cs, s.alphabet.Letter(int(p>>(uint(i)<<1)&0x3)))
		}
	}

	return alphabet.Letters(cs).String()
}

The default Stringify function for Seq.

Functions

This section is empty.

Types

type Packing

type Packing struct {
	Letters           []alphabet.Pack // Big-endian packing.
	LeftPad, RightPad int8
}

Packing is a type holding bit packed letters and padding offsets.

func PackLetters

func PackLetters(a alphabet.Nucleic, s ...alphabet.Letter) (p *Packing)

Pack bytes that conform to a into a slice of alphabet.Pack. Panics if a byte in s does not conform.

func PackQLetters

func PackQLetters(a alphabet.Nucleic, s ...alphabet.QLetter) (p *Packing)

Pack bytes that conform to a into a slice of alphabet.Pack. Panics if a byte in s does not conform.

func (*Packing) Align

func (self *Packing) Align(where int)

Align the Packing to the specified end.

type QSeq

type QSeq struct {
	ID         string
	Desc       string
	Loc        string
	S          []alphabet.QPack
	Strand     nucleic.Strand
	Threshold  alphabet.Qphred // Threshold for returning valid letter.
	LowQFilter seq.Filter      // How to represent below threshold letter.
	Stringify  seq.Stringify   // Function allowing user specified string representation.
	Meta       interface{}     // No operation implicitly copies or changes the contents of Meta.
	// contains filtered or unexported fields
}

QSeq is a packed nucleic acid with Phred quality scores allowing one byte per quality base.

func NewQSeq

func NewQSeq(id string, qp []alphabet.QPack, alpha alphabet.Nucleic, encode alphabet.Encoding) (p *QSeq, err error)

Create a new QSeq with the given id, letter sequence, alphabet and quality encoding.

func (*QSeq) Alphabet

func (self *QSeq) Alphabet() alphabet.Alphabet

Return the Alphabet used by the sequence.

func (*QSeq) AppendLetters

func (self *QSeq) AppendLetters(a ...alphabet.Letter) (err error)

Append QLetters to the sequence, the DefaultQphred value is used for quality scores.

func (*QSeq) AppendQLetters

func (self *QSeq) AppendQLetters(a ...alphabet.QLetter) (err error)

Append QLetters to the seq. Qualities are set to the default 0.

func (*QSeq) At

func (self *QSeq) At(pos seq.Position) alphabet.QLetter

Return the letter as position pos.

func (*QSeq) Circular

func (self *QSeq) Circular(c bool)

Specify that the sequence is circular.

func (*QSeq) Compose

func (self *QSeq) Compose(f feat.FeatureSet) (err error)

Join segments of the sequence, returning any error.

func (*QSeq) Copy

func (self *QSeq) Copy() seq.Sequence

Return a copy of the sequence.

func (*QSeq) Count

func (self *QSeq) Count() int

Satisfy Counter.

func (*QSeq) Description

func (self *QSeq) Description() *string

Description returns a pointer to the Desc string of the sequence.

func (*QSeq) EAt

func (self *QSeq) EAt(pos seq.Position) float64

Return the probability of a sequence error at position pos.

func (*QSeq) Encoding

func (self *QSeq) Encoding() alphabet.Encoding

Return the quality encoding type.

func (*QSeq) End

func (self *QSeq) End() int

Return the end position of the sequence in global coordinates.

func (*QSeq) IsCircular

func (self *QSeq) IsCircular() bool

Return whether the sequence is circular.

func (*QSeq) Join

func (self *QSeq) Join(p *QSeq, where int) (err error)

Join p to the sequence at the end specified by where.

func (*QSeq) Len

func (self *QSeq) Len() int

Return the length of the sequence.

func (*QSeq) Location

func (self *QSeq) Location() *string

Location returns a pointer to the Loc string of the sequence.

func (*QSeq) Moltype

func (self *QSeq) Moltype() bio.Moltype

Return the molecule type of the sequence.

func (*QSeq) Name

func (self *QSeq) Name() *string

Name returns a pointer to the ID string of the sequence.

func (*QSeq) Nucleic

func (self *QSeq) Nucleic()

Required to satisfy nucleic.Sequence interface.

func (*QSeq) Offset

func (self *QSeq) Offset(o int)

Set the global offset of the sequence to o.

func (*QSeq) QDecode

func (self *QSeq) QDecode(l byte) alphabet.Qphred

Decode a quality letter to a phred score based on the sequence encoding setting.

func (*QSeq) QEncode

func (self *QSeq) QEncode(pos seq.Position) byte

Encode the quality at position pos to a letter based on the sequence encoding setting.

func (*QSeq) Raw

func (self *QSeq) Raw() interface{}

Raw returns a pointer to the underlying []alphabet.QPack slice.

func (*QSeq) RevComp

func (self *QSeq) RevComp()

Reverse complement the sequence.

Example
q := []alphabet.Qphred{
	2, 13, 19, 22, 19, 18, 20, 23, 23, 20, 16, 21, 24, 22, 22, 18, 17, 18, 22, 23, 22, 24, 22, 24, 20, 15,
	18, 18, 19, 19, 20, 12, 18, 17, 20, 20, 20, 18, 15, 18, 24, 21, 13, 8, 15, 20, 20, 19, 20, 20, 20, 18,
	16, 16, 16, 10, 15, 18, 18, 18, 11, 2, 11, 20, 19, 18, 18, 16, 10, 12, 22, 0, 0, 0, 0}
l := []alphabet.Letter("NTTTCTTCTATATCCTTTTCATCTTTTAATCCATTCACCATTTTTTTCCCTCCACCTACCTNTCCTTCTCTTTCT")
if s, err := NewQSeq("example DNA", nil, alphabet.DNA, alphabet.Sanger); err == nil {
	s.Stringify = func(p seq.Polymer) string {
		s := p.(*QSeq)
		lb, qb, b := []alphabet.Letter{}, []byte{}, []byte{}
		for i, qp := range s.S {
			ql := qp.Unpack(s.Alphabet().(alphabet.Nucleic))
			if ql.Q > 2 {
				ql.L &^= 0x20
			} else {
				ql.L = 'n'
			}
			lb = append(lb, ql.L)
			qb = append(qb, s.QEncode(seq.Position{Pos: i}))
		}
		b = append(b, alphabet.LettersToBytes(lb)...)
		b = append(b, '\n')
		b = append(b, qb...)
		return string(b)
	}

	for i := range l {
		s.AppendQLetters(alphabet.QLetter{L: l[i], Q: q[i]})
	}
	fmt.Println("Forward:")
	fmt.Println(s)
	s.RevComp()
	fmt.Println("Reverse:")
	fmt.Println(s)
}
Output:

Forward:
nTTTCTTCTATATCCTTTTCATCTTTTAATCCATTCACCATTTTTTTCCCTCCACCTACCTnTCCTTCTCTnnnn
#.47435885169773237879795033445-3255530396.)05545553111+0333,#,54331+-7!!!!
Reverse:
nnnnAGAGAAGGAnAGGTAGGTGGAGGGAAAAAAATGGTGAATGGATTAAAAGATGAAAAGGATATAGAAGAAAn
!!!!7-+13345,#,3330+11135554550).6930355523-54433059797873237796158853474.#

func (*QSeq) Reverse

func (self *QSeq) Reverse()

Reverse the sequence.

func (*QSeq) Set

func (self *QSeq) Set(pos seq.Position, l alphabet.QLetter)

Set the letter at position pos to l.

func (*QSeq) SetE

func (self *QSeq) SetE(pos seq.Position, e float64)

Set the quality at position pos to e to reflect the given p(Error).

func (*QSeq) SetEncoding

func (self *QSeq) SetEncoding(e alphabet.Encoding)

Set the quality encoding type to e.

func (*QSeq) Start

func (self *QSeq) Start() int

Return the start position of the sequence in global coordinates.

func (*QSeq) Stitch

func (self *QSeq) Stitch(f feat.FeatureSet) (err error)

Join sequentially order disjunct segments of the sequence, returning any error.

func (*QSeq) String

func (self *QSeq) String() string

Return a string representation of the sequence. Representation is determined by the Stringify field.

func (*QSeq) Subseq

func (self *QSeq) Subseq(start int, end int) (sub seq.Sequence, err error)

Return a subsequence from start to end, wrapping if the sequence is circular.

func (*QSeq) Truncate

func (self *QSeq) Truncate(start int, end int) (err error)

Truncate the sequenc from start to end, wrapping if the sequence is circular.

func (*QSeq) Unpack

func (self *QSeq) Unpack() (n *nucleic.Seq, q *quality.Phred)

Return an unpacked sequence and quality.

func (*QSeq) Validate

func (self *QSeq) Validate() (bool, int)

Validate the letters of the sequence according to the specified alphabet. This is always successful as encoding does not allow invalid letters.

type Seq

type Seq struct {
	ID        string
	Desc      string
	Loc       string
	S         *Packing
	Strand    nucleic.Strand
	Stringify seq.Stringify // Function allowing user specified string representation.
	Meta      interface{}   // No operation implicitly copies or changes the contents of Meta.
	// contains filtered or unexported fields
}

Seq is a nucleic sequence packed 4 bases per byte.

func NewSeq

func NewSeq(id string, b []alphabet.Letter, alpha alphabet.Nucleic) (p *Seq, err error)

Create a new Seq with the given id, letter sequence and alphabet.

func (*Seq) Alphabet

func (self *Seq) Alphabet() alphabet.Alphabet

Return the Alphabet used by the sequence.

func (*Seq) AppendLetters

func (self *Seq) AppendLetters(a ...alphabet.Letter) (err error)

Append Letters to the sequence.

func (*Seq) AppendQLetters

func (self *Seq) AppendQLetters(a ...alphabet.QLetter) (err error)

Append QLetters to the sequence.

func (*Seq) At

func (self *Seq) At(pos seq.Position) alphabet.QLetter

Return the letter at position pos.

func (*Seq) Circular

func (self *Seq) Circular(c bool)

Specify that the sequence is circular.

func (*Seq) Compose

func (self *Seq) Compose(f feat.FeatureSet) (err error)

Join segments of the sequence, returning any error.

Example
if s, err := NewSeq("example DNA", []alphabet.Letter("aAGTATAAgtcagtgcagtgtctggcagTAgtagtgaagtagggttagttta"), alphabet.DNA); err == nil {
	f := feat.FeatureSet{
		&feat.Feature{Start: 0, End: 30},
		&feat.Feature{Start: 1, End: 8},
		&feat.Feature{Start: 28, End: 30},
		&feat.Feature{Start: 30, End: s.Len() - 1},
	}
	fmt.Println(s)
	if err := s.Compose(f); err == nil {
		fmt.Println(s)
	}
}
Output:

aagtataagtcagtgcagtgtctggcagtagtagtgaagtagggttagttta
aagtataagtcagtgcagtgtctggcagtaagtataatagtagtgaagtagggttagttt

func (*Seq) Copy

func (self *Seq) Copy() seq.Sequence

Return a copy of the sequence.

func (*Seq) Count

func (self *Seq) Count() int

Satisfy Counter.

func (*Seq) Description

func (self *Seq) Description() *string

Description returns a pointer to the Desc string of the sequence.

func (*Seq) End

func (self *Seq) End() int

Return the end position of the sequence in global coordinates.

func (*Seq) IsCircular

func (self *Seq) IsCircular() bool

Return whether the sequence is circular.

func (*Seq) Join

func (self *Seq) Join(p *Seq, where int) (err error)

Join p to the sequence at the end specified by where.

Example
var (
	s1, s2 *Seq
	err    error
)

if s1, err = NewSeq("a", []alphabet.Letter("agctgtgctga"), alphabet.DNA); err != nil {
	return
}
if s2, err = NewSeq("b", []alphabet.Letter("CGTGCAGTCATGAGTGA"), alphabet.DNA); err != nil {
	return
}
fmt.Println(s1, s2)
if err = s1.Join(s2, seq.Start); err == nil {
	fmt.Println(s1)
}

if s1, err = NewSeq("a", []alphabet.Letter("agctgtgctga"), alphabet.DNA); err != nil {
	return
}
if s2, err = NewSeq("b", []alphabet.Letter("CGTGCAGTCATGAGTGA"), alphabet.DNA); err != nil {
	return
}
if err = s1.Join(s2, seq.End); err == nil {
	fmt.Println(s1)
}
Output:

agctgtgctga cgtgcagtcatgagtga
cgtgcagtcatgagtgaagctgtgctga
agctgtgctgacgtgcagtcatgagtga

func (*Seq) Len

func (self *Seq) Len() int

Return the length of the sequence.

func (*Seq) Location

func (self *Seq) Location() *string

Location returns a pointer to the Loc string of the sequence.

func (*Seq) Moltype

func (self *Seq) Moltype() bio.Moltype

Return the molecule type of the sequence.

func (*Seq) Name

func (self *Seq) Name() *string

Name returns a pointer to the ID string of the sequence.

func (*Seq) Nucleic

func (self *Seq) Nucleic()

Required to satisfy nucleic.Sequence interface.

func (*Seq) Offset

func (self *Seq) Offset(o int)

Set the global offset of the sequence to o.

func (*Seq) Raw

func (self *Seq) Raw() interface{}

Raw returns the underlying *Packing struct pointer.

func (*Seq) RevComp

func (self *Seq) RevComp()

Reverse complement the sequence.

func (*Seq) Reverse

func (self *Seq) Reverse()

Reverse the sequence.

func (*Seq) Set

func (self *Seq) Set(pos seq.Position, l alphabet.QLetter)

Set the letter at position pos to l.

func (*Seq) Start

func (self *Seq) Start() int

Return the start position of the sequence in global coordinates.

func (*Seq) Stitch

func (self *Seq) Stitch(f feat.FeatureSet) (err error)

Join sequentially order disjunct segments of the sequence, returning any error.

Example
if s, err := NewSeq("example DNA", []alphabet.Letter("aAGTATAAgtcagtgcagtgtctggcagTGCTCGTGCgtagtgaagtagGGTTAGTTTa"), alphabet.DNA); err == nil {
	f := feat.FeatureSet{
		&feat.Feature{Start: 1, End: 8},
		&feat.Feature{Start: 28, End: 37},
		&feat.Feature{Start: 49, End: s.Len() - 1},
	}
	fmt.Println(s)
	if err := s.Stitch(f); err == nil {
		fmt.Println(s)
	}
}
Output:

aagtataagtcagtgcagtgtctggcagtgctcgtgcgtagtgaagtagggttagttta
agtataatgctcgtgcggttagttt

func (*Seq) String

func (self *Seq) String() string

Return a string representation of the sequence. Representation is determined by the Stringify field.

func (*Seq) Subseq

func (self *Seq) Subseq(start int, end int) (sub seq.Sequence, err error)

Return a subsequence from start to end, wrapping if the sequence is circular.

func (*Seq) Truncate

func (self *Seq) Truncate(start int, end int) (err error)

Truncate the sequence from start to end, wrapping if the sequence is circular.

func (*Seq) Unpack

func (self *Seq) Unpack() (n *nucleic.Seq)

Return an unpacked sequence.

func (*Seq) Validate

func (self *Seq) Validate() (bool, int)

Validate the letters of the sequence according to the specified alphabet. This is always successful as encoding does not allow invalid letters.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL