codon

package
v0.29.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 1, 2023 License: MIT Imports: 9 Imported by: 0

Documentation

Overview

Package codon is a package for optimizing codons for expression when synthesizing DNA.

This package contains almost everything you need to do standard codon optimization.

Biological context: certain cells favor certain codons and will reject or under express sequences that don't use a similar ratio of codons. This is called codon bias: https://en.wikipedia.org/wiki/Codon_usage_bias

Furthermore, different ribosomes in different organisms will interpret codons differently. What may be a start codon for one ribosome may be a stop in the other. Heck, apparently nucleomorphs contain 4 different kinds of ribosomes. https://en.wikipedia.org/wiki/Nucleomorph <- Thanks Keoni for mentioning this example!

TTFN, Tim

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func WriteCodonJSON

func WriteCodonJSON(codonTable *TranslationTable, path string)

WriteCodonJSON writes a codonTable struct out to JSON.

Example
package main

import (
	"fmt"
	"os"

	"github.com/TimothyStiles/poly/synthesis/codon"
)

func main() {
	codontable := codon.ReadCodonJSON("../../data/bsub_codon_test.json")
	codon.WriteCodonJSON(codontable, "../../data/codon_test.json")
	testCodonTable := codon.ReadCodonJSON("../../data/codon_test.json")

	// cleaning up test data
	os.Remove("../../data/codon_test.json")

	fmt.Println(testCodonTable.GetWeightedAminoAcids()[0].Codons[0].Weight)
}
Output:

28327

Types

type AminoAcid

type AminoAcid struct {
	Letter string  `json:"letter"`
	Codons []Codon `json:"codons"`
}

AminoAcid holds information for an amino acid and related codons in a struct

type Codon

type Codon struct {
	Triplet string `json:"triplet"`
	Weight  int    `json:"weight"` // needs to be set to 1 for random chooser
}

Codon holds information for a codon triplet in a struct

type Stats added in v0.28.0

type Stats struct {
	StartCodonCount map[string]int
	GeneCount       int
}

Stats denotes a set of statistics we maintain throughout the translation table's lifetime. For example we track the start codons observed when we update the codon table's weights with other DNA sequences

func NewStats added in v0.28.0

func NewStats() *Stats

NewStats returns a new instance of codon statistics (a set of statistics we maintain throughout a translation table's lifetime)

type Table

type Table interface {
	GetWeightedAminoAcids() []AminoAcid
	Optimize(aminoAcids string, randomState ...int) (string, error)
	Translate(dnaSeq string) (string, error)
}

Table is an interface encompassing what a potentially codon optimized Translation table can do

type TranslationTable added in v0.28.0

type TranslationTable struct {
	StartCodons []string    `json:"start_codons"`
	StopCodons  []string    `json:"stop_codons"`
	AminoAcids  []AminoAcid `json:"amino_acids"`

	TranslationMap  map[string]string
	StartCodonTable map[string]string
	Choosers        map[string]weightedRand.Chooser

	Stats *Stats
}

TranslationTable contains a weighted codon table, which is used when translating and optimizing sequences. The weights can be updated through the codon frequencies we observe in given DNA sequences.

func AddCodonTable

func AddCodonTable(firstCodonTable, secondCodonTable *TranslationTable) (*TranslationTable, error)

AddCodonTable takes 2 CodonTables and adds them together to create a new codonTable.

Example
package main

import (
	"fmt"

	"github.com/TimothyStiles/poly/io/genbank"
	"github.com/TimothyStiles/poly/synthesis/codon"
)

func main() {
	sequence, _ := genbank.Read("../../data/puc19.gbk")

	// weight our codon optimization table using the regions we collected from the genbank file above
	optimizationTable := codon.NewTranslationTable(11)
	err := optimizationTable.UpdateWeightsWithSequence(sequence)
	if err != nil {
		panic(fmt.Errorf("got unexpected error in an example: %w", err))
	}

	sequence2, _ := genbank.Read("../../data/phix174.gb")
	optimizationTable2 := codon.NewTranslationTable(11)
	err = optimizationTable2.UpdateWeightsWithSequence(sequence2)
	if err != nil {
		panic(fmt.Errorf("got unexpected error in an example: %w", err))
	}

	finalTable, err := codon.AddCodonTable(optimizationTable, optimizationTable2)
	if err != nil {
		panic(fmt.Errorf("got error in adding codon table example: %w", err))
	}

	for _, aa := range finalTable.AminoAcids {
		for _, codon := range aa.Codons {
			if codon.Triplet == "GGC" {
				fmt.Println(codon.Weight)
			}
		}
	}
}
Output:

90

func CompromiseCodonTable

func CompromiseCodonTable(firstCodonTable, secondCodonTable *TranslationTable, cutOff float64) (*TranslationTable, error)

CompromiseCodonTable takes 2 CodonTables and makes a new codonTable that is an equal compromise between the two tables.

Example
package main

import (
	"fmt"

	"github.com/TimothyStiles/poly/io/genbank"
	"github.com/TimothyStiles/poly/synthesis/codon"
)

func main() {
	sequence, _ := genbank.Read("../../data/puc19.gbk")

	// weight our codon optimization table using the regions we collected from the genbank file above
	optimizationTable := codon.NewTranslationTable(11)
	err := optimizationTable.UpdateWeightsWithSequence(sequence)
	if err != nil {
		panic(fmt.Errorf("got unexpected error in an example: %w", err))
	}

	sequence2, _ := genbank.Read("../../data/phix174.gb")
	optimizationTable2 := codon.NewTranslationTable(11)
	err = optimizationTable2.UpdateWeightsWithSequence(sequence2)
	if err != nil {
		panic(fmt.Errorf("got unexpected error in an example: %w", err))
	}

	finalTable, _ := codon.CompromiseCodonTable(optimizationTable, optimizationTable2, 0.1)
	for _, aa := range finalTable.GetWeightedAminoAcids() {
		for _, codon := range aa.Codons {
			if codon.Triplet == "TAA" {
				fmt.Println(codon.Weight)
			}
		}
	}
}
Output:

2727

func NewTranslationTable added in v0.28.0

func NewTranslationTable(index int) *TranslationTable

NewTranslationTable takes the index of desired NCBI codon table and returns it.

func ParseCodonJSON

func ParseCodonJSON(file []byte) *TranslationTable

ParseCodonJSON parses a codonTable JSON file.

Example
package main

import (
	"fmt"
	"os"

	"github.com/TimothyStiles/poly/synthesis/codon"
)

func main() {
	file, _ := os.ReadFile("../../data/bsub_codon_test.json")
	codontable := codon.ParseCodonJSON(file)

	fmt.Println(codontable.GetWeightedAminoAcids()[0].Codons[0].Weight)
}
Output:

28327

func ReadCodonJSON

func ReadCodonJSON(path string) *TranslationTable

ReadCodonJSON reads a codonTable JSON file.

Example
package main

import (
	"fmt"

	"github.com/TimothyStiles/poly/synthesis/codon"
)

func main() {
	codontable := codon.ReadCodonJSON("../../data/bsub_codon_test.json")

	fmt.Println(codontable.GetWeightedAminoAcids()[0].Codons[0].Weight)
}
Output:

28327

func (*TranslationTable) Copy added in v0.28.0

func (table *TranslationTable) Copy() *TranslationTable

Copy returns a deep copy of the translation table. This is to prevent an unintended update of data used in another process, since the tables are generated at build time.

func (*TranslationTable) GetWeightedAminoAcids added in v0.28.0

func (table *TranslationTable) GetWeightedAminoAcids() []AminoAcid

GetWeightedAminoAcids returns the amino acids along with their associated codon weights

func (*TranslationTable) Optimize added in v0.29.0

func (table *TranslationTable) Optimize(aminoAcids string, randomState ...int) (string, error)

Optimize will return a set of codons which can be used to encode the given amino acid sequence. The codons picked are weighted according to the computed translation table's weights

Example
package main

import (
	"fmt"

	"github.com/TimothyStiles/poly/io/genbank"
	"github.com/TimothyStiles/poly/synthesis/codon"
)

func main() {
	gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"

	sequence, _ := genbank.Read("../../data/puc19.gbk")
	codonTable := codon.NewTranslationTable(11)
	_ = codonTable.UpdateWeightsWithSequence(sequence)

	// Here, we double check if the number of genes is equal to the number of stop codons
	stopCodonCount := 0
	for _, aa := range codonTable.AminoAcids {
		if aa.Letter == "*" {
			for _, codon := range aa.Codons {
				stopCodonCount = stopCodonCount + codon.Weight
			}
		}
	}

	if stopCodonCount != codonTable.Stats.GeneCount {
		fmt.Println("Stop codons don't equal number of genes!")
	}

	optimizedSequence, _ := codonTable.Optimize(gfpTranslation)
	optimizedSequenceTranslation, _ := codonTable.Translate(optimizedSequence)

	fmt.Println(optimizedSequenceTranslation == gfpTranslation)
}
Output:

true

func (*TranslationTable) Translate added in v0.28.0

func (table *TranslationTable) Translate(dnaSeq string) (string, error)

Translate will return an amino acid sequence which the given DNA will yield

Example
package main

import (
	"fmt"

	"github.com/TimothyStiles/poly/synthesis/codon"
)

func main() {
	gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
	gfpDnaSequence := "ATGGCTAGCAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCTACATACGGAAAGCTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCCCGTTATCCGGATCATATGAAACGGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAACGCACTATATCTTTCAAAGATGACGGGAACTACAAGACGCGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATCGTATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTCGGACACAAACTCGAGTACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTCGCCACAACATTGAAGATGGATCCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCGACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGCGTGACCACATGGTCCTTCTTGAGTTTGTAACTGCTGCTGGGATTACACATGGCATGGATGAGCTCTACAAATAA"
	testTranslation, _ := codon.NewTranslationTable(11).Translate(gfpDnaSequence) // need to specify which codons map to which amino acids per NCBI table

	fmt.Println(gfpTranslation == testTranslation)
}
Output:

true

func (*TranslationTable) UpdateWeights added in v0.28.0

func (table *TranslationTable) UpdateWeights(aminoAcids []AminoAcid) error

UpdateWeights will update the translation table's codon pickers with the given amino acid codon weights

Example
package main

import (
	"fmt"

	"github.com/TimothyStiles/poly/synthesis/codon"
)

func main() {
	gfpTranslation := "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*"
	sequenceWithCustomWeights := "ATGGCAAGTAAGGGAGAAGAGCTTTTTACCGGCGTAGTACCAATTCTGGTAGAACTGGATGGTGATGTAAACGGTCACAAATTTAGTGTAAGCGGAGAAGGTGAGGGTGATGCTACCTATGGCAAACTGACCCTAAAGTTTATATGCACGACTGGAAAACTTCCGGTACCGTGGCCAACGTTAGTTACAACGTTTTCTTATGGAGTACAGTGCTTCAGCCGCTACCCAGATCATATGAAACGCCATGATTTCTTTAAGAGCGCCATGCCAGAGGGTTATGTTCAGGAGCGCACGATCTCGTTTAAGGATGATGGTAACTATAAGACTCGTGCTGAGGTGAAGTTCGAAGGCGATACCCTTGTAAATCGTATTGAATTGAAGGGTATAGACTTCAAGGAGGATGGAAATATTCTTGGACATAAGCTGGAATACAATTACAATTCACATAACGTTTATATAACTGCCGACAAGCAAAAAAACGGGATAAAAGCTAATTTTAAAATACGCCACAACATAGAGGACGGGTCGGTGCAACTAGCCGATCATTATCAACAAAACACACCAATCGGCGACGGACCAGTTCTGTTGCCCGATAATCATTACTTATCAACCCAAAGTGCCTTAAGTAAGGATCCGAACGAAAAGCGCGATCATATGGTACTTCTTGAGTTTGTTACCGCTGCAGGCATAACGCATGGCATGGACGAGCTATACAAATAA"

	table := codon.NewTranslationTable(11)

	// this example is using custom weights for different codons for Arginine. Use this if you would rather use your own
	// codon weights, they can also be computed for you with `UpdateWeightsWithSequence`.

	err := table.UpdateWeights([]codon.AminoAcid{
		{
			Letter: "R",
			Codons: []codon.Codon{
				{
					Triplet: "CGU",
					Weight:  1,
				},
				{
					Triplet: "CGA",
					Weight:  2,
				},
				{
					Triplet: "CGG",
					Weight:  4,
				},
				{
					Triplet: "AGA",
					Weight:  6,
				},
				{
					Triplet: "AGG",
					Weight:  2,
				},
			},
		},
	})
	if err != nil {
		fmt.Println("Could not update weights in example")
	}

	optimizedSequence, _ := table.Optimize(gfpTranslation, 1)

	fmt.Println(optimizedSequence == sequenceWithCustomWeights)
}
Output:

true

func (*TranslationTable) UpdateWeightsWithSequence added in v0.28.0

func (table *TranslationTable) UpdateWeightsWithSequence(data genbank.Genbank) error

UpdateWeightsWithSequence will look at the coding regions in the given genbank data, and use those to generate new weights for the codons in the translation table. The next time a sequence is optimised, it will use those updated weights.

This can be used to, for example, figure out which DNA sequence is needed to give the best yield of protein when trying to express a protein across different species

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL