gfa

package module
v0.0.0-...-05c9395 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 2, 2019 License: MIT Imports: 14 Imported by: 2

README

gfa

a Go library for working with Graphical Fragment Assembly format


travis GoDoc goreportcard codecov

Overview

This is a Go library for working with the Graphical Fragment Assembly (GFA) format.

The purpose of the GFA format is to capture sequence graphs as the product of an assembly, a representation of variation in genomes, splice graphs in genes, or even overlap between reads from long-read sequencing technology.

Read the GFA spec here.

Current limitations:

  • restricted to GFA version 1
  • does not handle the containment field
  • validation is limited

Installation

go get github.com/will-rowe/gfa

Example usage

convert an MSA file to a GFA file

package main

import (
	"log"
	"os"

	"github.com/will-rowe/gfa"
)

var (
	inputFile = "./example.msa"
)

func main() {
	// open the MSA
	msa, _ := gfa.ReadMSA(inputFile)

	// convert the MSA to a GFA instance
	myGFA, err := gfa.MSA2GFA(msa)
	if err != nil {
		log.Fatal(err)
	}

	// create a gfaWriter
	outfile, err := os.Create("./example.gfa")
	defer outfile.Close()
	writer, err := gfa.NewWriter(outfile, myGFA)
	if err != nil {
		log.Fatal(err)
	}

	// write the GFA content
	if err := myGFA.WriteGFAContent(writer); err != nil {
		log.Fatal(err)
	}
}

process a GFA file line by line

package main

import (
	"flag"
	"io"
	"log"
	"os"

	"github.com/will-rowe/gfa"
)

var (
	inputFile = flag.String("inputFile", "", "input GFA file (empty for STDIN)")
)

func main() {
	flag.Parse()
	var r io.Reader

	// open file stream and close it when finished
	if *inputFile == "" {
		r = os.Stdin
	} else {
		fh, err := os.Open(*inputFile)
		if err != nil {
			log.Fatalf("could not open file %q:", err)
		}
		defer fh.Close()
		r = fh
	}

	// create a GFA reader
	reader, err := gfa.NewReader(r)
	if err != nil {
		log.Fatal("can't read gfa file: %v", err)
	}

	// collect the GFA instance
	myGFA := reader.CollectGFA()

	// check version and print the header / comment lines
	if myGFA.GetVersion() != 1 {
		log.Fatal("gfa file is not in version 1 format")
	}
	log.Println(myGFA.PrintHeader())
	if comments := myGFA.PrintComments(); comments != "" {
		log.Println(comments)
	}

	// read the GFA file
	for {
		line, err := reader.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			log.Fatal("error reading line in gfa file: %v", err)
		}

		// each line produced by Read() satisfies the gfaLine interface
		formattedLine := line.PrintGFAline()
		log.Printf("gfa line: %v", formattedLine)

		// you can also add the line to the GFA instance
		if err := line.Add(myGFA); err != nil {
			log.Fatal("error adding line to GFA instance: %v", err)
		}
	}
}

Documentation

Overview

Package gfa is a Go library for working with the Graphical Fragment Assembly (GFA) format.

For more information:

GFA Format Specification
https://github.com/GFA-spec/GFA-spec

This package currently only conforms to GFA1 spec.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func NewLink(from, fOrient, to, tOrient, overlap []byte) (*link, error)

NewLink is a link constructor

func NewOptionalFields

func NewOptionalFields(optional ...[]byte) (*optionalFields, error)

NewOptionalFields is an optionalFields constructor

func NewPath

func NewPath(n []byte, segs, olaps [][]byte) (*path, error)

NewPath is a path constructor

func NewSegment

func NewSegment(n, seq []byte) (*segment, error)

NewSegment is a segment constructor

func ReadMSA

func ReadMSA(fileName string) (*multi.Multi, error)

ReadMSA will read in an MSA file and store it as a Multi (MSA)

Types

type GFA

type GFA struct {
	// contains filtered or unexported fields
}

The GFA type holds all the information from a GFA formatted file

func MSA2GFA

func MSA2GFA(msa *multi.Multi) (*GFA, error)

MSA2GFA converts an MSA to a GFA instance

func NewGFA

func NewGFA() *GFA

NewGFA returns a new GFA instance

func (*GFA) AddComment

func (gfa *GFA) AddComment(c []byte)

AddComment appends a comment to the comments held by the GFA instance

func (*GFA) AddVersion

func (gfa *GFA) AddVersion(v int) error

AddVersion adds the GFA format version to the GFA instance

func (gfa *GFA) GetLinks() ([]*link, error)

GetLinks returns a slice of all the links held in the GFA instance

func (*GFA) GetPaths

func (gfa *GFA) GetPaths() ([]*path, error)

GetPaths returns a slice of all the paths held in the GFA instance

func (*GFA) GetSegments

func (gfa *GFA) GetSegments() ([]*segment, error)

GetSegments returns a slice of all the segments held in the GFA instance

func (*GFA) GetVersion

func (gfa *GFA) GetVersion() int

GetVersion returns the GFA version

// a return value of 0 indicates no version set

func (*GFA) MarshalHeader

func (gfa *GFA) MarshalHeader() []byte

MarshalHeader prepares the header/comment lines for a writer

func (*GFA) PrintComments

func (gfa *GFA) PrintComments() string

PrintComments prints a string of GFA formatted comment line(s)

func (*GFA) PrintHeader

func (gfa *GFA) PrintHeader() string

PrintHeader prints the GFA formatted header line

func (*GFA) PrintSequence

func (gfa *GFA) PrintSequence(pathName []byte) ([]byte, error)

PrintSequence will return the sequence encoded by a specified pathName

func (*GFA) Validate

func (gfa *GFA) Validate() error

Validate performs several checks on the GFA instance TODO: add more checks

// checks that it contains a version (1/2)

// checks that is contains 1 or more segments

func (*GFA) WriteGFAContent

func (gfa *GFA) WriteGFAContent(w *GFAwriter) error

WriteGFAContent will dump the content of a GFA instance to file

type GFAwriter

type GFAwriter struct {
	// contains filtered or unexported fields
}

GFAwriter implements GFA format writing

func NewWriter

func NewWriter(w io.Writer, myGFA *GFA) (*GFAwriter, error)

NewWriter returns a Writer to the given io.Writer

func (*GFAwriter) Write

func (myWriter *GFAwriter) Write(line gfaLine) error

Write writes a line to the GFA stream

type Reader

type Reader struct {
	// contains filtered or unexported fields
}

Reader implements GFA format reading.

func NewReader

func NewReader(r io.Reader) (*Reader, error)

NewReader returns a new Reader, reading from the given io.Reader

func (*Reader) CollectGFA

func (r *Reader) CollectGFA() *GFA

CollectGFA returns the GFA instance held by the reader

func (*Reader) Read

func (r *Reader) Read() (gfaLine, error)

Read returns the next (non H/#) GFA line from the reader

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL