Documentation
¶
Overview ¶
Package codon is a package for optimizing codons for expression when synthesizing DNA.
This package contains almost everything you need to do standard codon optimization.
Biological context: certain cells favor certain codons and will reject or under express sequences that don't use a similar ratio of codons. This is called codon bias: https://en.wikipedia.org/wiki/Codon_usage_bias
Furthermore, different ribosomes in different organisms will interpret codons differently. What may be a start codon for one ribosome may be a stop in the other. Heck, apparently nucleomorphs contain 4 different kinds of ribosomes. https://en.wikipedia.org/wiki/Nucleomorph <- Thanks Keoni for mentioning this example!
TTFN, Tim
Index ¶
- func WriteCodonJSON(codonTable *TranslationTable, path string)
- type AminoAcid
- type Codon
- type Stats
- type Table
- type TranslationTable
- func AddCodonTable(firstCodonTable, secondCodonTable *TranslationTable) (*TranslationTable, error)
- func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, cutOff float64) (*TranslationTable, error)
- func NewTranslationTable(index int) *TranslationTable
- func ParseCodonJSON(file []byte) *TranslationTable
- func ReadCodonJSON(path string) *TranslationTable
- func (table *TranslationTable) Copy() *TranslationTable
- func (table *TranslationTable) GetWeightedAminoAcids() []AminoAcid
- func (table *TranslationTable) Optimize(aminoAcids string, randomState ...int) (string, error)
- func (table *TranslationTable) Translate(dnaSeq string) (string, error)
- func (table *TranslationTable) UpdateWeights(aminoAcids []AminoAcid) error
- func (table *TranslationTable) UpdateWeightsWithSequence(data genbank.Genbank) error
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func WriteCodonJSON ¶
func WriteCodonJSON(codonTable *TranslationTable, path string)
WriteCodonJSON writes a codonTable struct out to JSON.
Example ¶
package main
import (
"fmt"
"os"
"github.com/TimothyStiles/poly/synthesis/codon"
)
func main() {
codontable := codon.ReadCodonJSON("../../data/bsub_codon_test.json")
codon.WriteCodonJSON(codontable, "../../data/codon_test.json")
testCodonTable := codon.ReadCodonJSON("../../data/codon_test.json")
// cleaning up test data
os.Remove("../../data/codon_test.json")
fmt.Println(testCodonTable.GetWeightedAminoAcids()[0].Codons[0].Weight)
}
Output: 28327
Types ¶
type Codon ¶
type Codon struct {
Triplet string `json:"triplet"`
Weight int `json:"weight"` // needs to be set to 1 for random chooser
}
Codon holds information for a codon triplet in a struct
type Stats ¶ added in v0.28.0
Stats denotes a set of statistics we maintain throughout the translation table's lifetime. For example we track the start codons observed when we update the codon table's weights with other DNA sequences
type Table ¶
type Table interface {
GetWeightedAminoAcids() []AminoAcid
Optimize(aminoAcids string, randomState ...int) (string, error)
Translate(dnaSeq string) (string, error)
}
Table is an interface encompassing what a potentially codon optimized Translation table can do
type TranslationTable ¶ added in v0.28.0
type TranslationTable struct {
StartCodons []string `json:"start_codons"`
StopCodons []string `json:"stop_codons"`
AminoAcids []AminoAcid `json:"amino_acids"`
TranslationMap map[string]string
StartCodonTable map[string]string
Choosers map[string]weightedRand.Chooser
Stats *Stats
}
TranslationTable contains a weighted codon table, which is used when translating and optimizing sequences. The weights can be updated through the codon frequencies we observe in given DNA sequences.
func AddCodonTable ¶
func AddCodonTable(firstCodonTable, secondCodonTable *TranslationTable) (*TranslationTable, error)
AddCodonTable takes 2 CodonTables and adds them together to create a new codonTable.
Example ¶
package main
import (
"fmt"
"github.com/TimothyStiles/poly/io/genbank"
"github.com/TimothyStiles/poly/synthesis/codon"
)
func main() {
sequence, _ := genbank.Read("../../data/puc19.gbk")
// weight our codon optimization table using the regions we collected from the genbank file above
optimizationTable := codon.NewTranslationTable(11)
err := optimizationTable.UpdateWeightsWithSequence(sequence)
if err != nil {
panic(fmt.Errorf("got unexpected error in an example: %w", err))
}
sequence2, _ := genbank.Read("../../data/phix174.gb")
optimizationTable2 := codon.NewTranslationTable(11)
err = optimizationTable2.UpdateWeightsWithSequence(sequence2)
if err != nil {
panic(fmt.Errorf("got unexpected error in an example: %w", err))
}
finalTable, err := codon.AddCodonTable(optimizationTable, optimizationTable2)
if err != nil {
panic(fmt.Errorf("got error in adding codon table example: %w", err))
}
for _, aa := range finalTable.AminoAcids {
for _, codon := range aa.Codons {
if codon.Triplet == "GGC" {
fmt.Println(codon.Weight)
}
}
}
}
Output: 90
func CompromiseCodonTable ¶
func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, cutOff float64) (*TranslationTable, error)
CompromiseCodonTable takes 2 CodonTables and makes a new codonTable that is an equal compromise between the two tables.
Example ¶
package main
import (
"fmt"
"github.com/TimothyStiles/poly/io/genbank"
"github.com/TimothyStiles/poly/synthesis/codon"
)
func main() {
sequence, _ := genbank.Read("../../data/puc19.gbk")
// weight our codon optimization table using the regions we collected from the genbank file above
optimizationTable := codon.NewTranslationTable(11)
err := optimizationTable.UpdateWeightsWithSequence(sequence)
if err != nil {
panic(fmt.Errorf("got unexpected error in an example: %w", err))
}
sequence2, _ := genbank.Read("../../data/phix174.gb")
optimizationTable2 := codon.NewTranslationTable(11)
err = optimizationTable2.UpdateWeightsWithSequence(sequence2)
if err != nil {
panic(fmt.Errorf("got unexpected error in an example: %w", err))
}
finalTable, _ := codon.CompromiseCodonTable(optimizationTable, optimizationTable2, 0.1)
for _, aa := range finalTable.GetWeightedAminoAcids() {
for _, codon := range aa.Codons {
if codon.Triplet == "TAA" {
fmt.Println(codon.Weight)
}
}
}
}
Output: 2727
func NewTranslationTable ¶ added in v0.28.0
func NewTranslationTable(index int) *TranslationTable
NewTranslationTable takes the index of desired NCBI codon table and returns it.
func ParseCodonJSON ¶
func ParseCodonJSON(file []byte) *TranslationTable
ParseCodonJSON parses a codonTable JSON file.
Example ¶
package main
import (
"fmt"
"os"
"github.com/TimothyStiles/poly/synthesis/codon"
)
func main() {
file, _ := os.ReadFile("../../data/bsub_codon_test.json")
codontable := codon.ParseCodonJSON(file)
fmt.Println(codontable.GetWeightedAminoAcids()[0].Codons[0].Weight)
}
Output: 28327
func ReadCodonJSON ¶
func ReadCodonJSON(path string) *TranslationTable
ReadCodonJSON reads a codonTable JSON file.
Example ¶
package main
import (
"fmt"
"github.com/TimothyStiles/poly/synthesis/codon"
)
func main() {
codontable := codon.ReadCodonJSON("../../data/bsub_codon_test.json")
fmt.Println(codontable.GetWeightedAminoAcids()[0].Codons[0].Weight)
}
Output: 28327
func (*TranslationTable) Copy ¶ added in v0.28.0
func (table *TranslationTable) Copy() *TranslationTable
Copy returns a deep copy of the translation table. This is to prevent an unintended update of data used in another process, since the tables are generated at build time.
func (*TranslationTable) GetWeightedAminoAcids ¶ added in v0.28.0
func (table *TranslationTable) GetWeightedAminoAcids() []AminoAcid
GetWeightedAminoAcids returns the amino acids along with their associated codon weights
func (*TranslationTable) Optimize ¶ added in v0.29.0
func (table *TranslationTable) Optimize(aminoAcids string, randomState ...int) (string, error)
Optimize will return a set of codons which can be used to encode the given amino acid sequence. The codons picked are weighted according to the computed translation table's weights
Example ¶
package main
import (
"fmt"
"github.com/TimothyStiles/poly/io/genbank"
"github.com/TimothyStiles/poly/synthesis/codon"
)
func main() {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
sequence, _ := genbank.Read("../../data/puc19.gbk")
codonTable := codon.NewTranslationTable(11)
_ = codonTable.UpdateWeightsWithSequence(sequence)
// Here, we double check if the number of genes is equal to the number of stop codons
stopCodonCount := 0
for _, aa := range codonTable.AminoAcids {
if aa.Letter == "*" {
for _, codon := range aa.Codons {
stopCodonCount = stopCodonCount + codon.Weight
}
}
}
if stopCodonCount != codonTable.Stats.GeneCount {
fmt.Println("Stop codons don't equal number of genes!")
}
optimizedSequence, _ := codonTable.Optimize(gfpTranslation)
optimizedSequenceTranslation, _ := codonTable.Translate(optimizedSequence)
fmt.Println(optimizedSequenceTranslation == gfpTranslation)
}
Output: true
func (*TranslationTable) Translate ¶ added in v0.28.0
func (table *TranslationTable) Translate(dnaSeq string) (string, error)
Translate will return an amino acid sequence which the given DNA will yield
Example ¶
package main
import (
"fmt"
"github.com/TimothyStiles/poly/synthesis/codon"
)
func main() {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
testTranslation, _ := codon.NewTranslationTable(11).Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table
fmt.Println(gfpTranslation == testTranslation)
}
Output: true
func (*TranslationTable) UpdateWeights ¶ added in v0.28.0
func (table *TranslationTable) UpdateWeights(aminoAcids []AminoAcid) error
UpdateWeights will update the translation table's codon pickers with the given amino acid codon weights
Example ¶
package main
import (
"fmt"
"github.com/TimothyStiles/poly/synthesis/codon"
)
func main() {
gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
sequenceWithCustomWeights := "ATGGCAAGTAAGGGAGAAGAGCTTTTTACCGGCGTAGTACCAATTCTGGTAGAACTGGATGGTGATGTAAACGGTCACAAATTTAGTGTAAGCGGAGAAGGTGAGGGTGATGCTACCTATGGCAAACTGACCCTAAAGTTTATATGCACGACTGGAAAACTTCCGGTACCGTGGCCAACGTTAGTTACAACGTTTTCTTATGGAGTACAGTGCTTCAGCCGCTACCCAGATCATATGAAACGCCATGATTTCTTTAAGAGCGCCATGCCAGAGGGTTATGTTCAGGAGCGCACGATCTCGTTTAAGGATGATGGTAACTATAAGACTCGTGCTGAGGTGAAGTTCGAAGGCGATACCCTTGTAAATCGTATTGAATTGAAGGGTATAGACTTCAAGGAGGATGGAAATATTCTTGGACATAAGCTGGAATACAATTACAATTCACATAACGTTTATATAACTGCCGACAAGCAAAAAAACGGGATAAAAGCTAATTTTAAAATACGCCACAACATAGAGGACGGGTCGGTGCAACTAGCCGATCATTATCAACAAAACACACCAATCGGCGACGGACCAGTTCTGTTGCCCGATAATCATTACTTATCAACCCAAAGTGCCTTAAGTAAGGATCCGAACGAAAAGCGCGATCATATGGTACTTCTTGAGTTTGTTACCGCTGCAGGCATAACGCATGGCATGGACGAGCTATACAAATAA"
table := codon.NewTranslationTable(11)
// this example is using custom weights for different codons for Arginine. Use this if you would rather use your own
// codon weights, they can also be computed for you with `UpdateWeightsWithSequence`.
err := table.UpdateWeights([]codon.AminoAcid{
{
Letter: "R",
Codons: []codon.Codon{
{
Triplet: "CGU",
Weight: 1,
},
{
Triplet: "CGA",
Weight: 2,
},
{
Triplet: "CGG",
Weight: 4,
},
{
Triplet: "AGA",
Weight: 6,
},
{
Triplet: "AGG",
Weight: 2,
},
},
},
})
if err != nil {
fmt.Println("Could not update weights in example")
}
optimizedSequence, _ := table.Optimize(gfpTranslation, 1)
fmt.Println(optimizedSequence == sequenceWithCustomWeights)
}
Output: true
func (*TranslationTable) UpdateWeightsWithSequence ¶ added in v0.28.0
func (table *TranslationTable) UpdateWeightsWithSequence(data genbank.Genbank) error
UpdateWeightsWithSequence will look at the coding regions in the given genbank data, and use those to generate new weights for the codons in the translation table. The next time a sequence is optimised, it will use those updated weights.
This can be used to, for example, figure out which DNA sequence is needed to give the best yield of protein when trying to express a protein across different species