gogo

package module
v0.0.0-...-62e55f1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 27, 2021 License: BSD-3-Clause Imports: 9 Imported by: 2

README

gogo

gogo is a package that enables programmatic queries of a Gene Ontology graph using the Gonum graph packages. It currently makes use of features that are not yet merged into the Gonum release branches.

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func ConnectedByAny

func ConnectedByAny(e graph.Edge, with func(*rdf.Statement) bool) bool

ConnectedByAny is a helper function to for simplifying graph traversal conditions.

Example (Depth)
package main

import (
	"fmt"
	"strings"

	"gonum.org/v1/gonum/graph"
	"gonum.org/v1/gonum/graph/formats/rdf"
	"gonum.org/v1/gonum/graph/traverse"

	"github.com/kortschak/gogo"
)

func main() {
	// This is an example of obtaining the DAG depth of all GO
	// terms. See the example for Graph for how to load a graph.

	type depth struct {
		level int
		root  rdf.Term
	}
	depths := make(map[rdf.Term]depth)

	// Iterate over all the roots, and walk down the DAG with
	// a breadth first search until exhausted.
	for _, r := range g.Roots(false) {
		depths[r] = depth{level: 0, root: r}
		bf := traverse.BreadthFirst{
			Traverse: func(e graph.Edge) bool {
				// Provide a filter for edges that match our requirement:
				//  - there must be a subClassOf relationship
				//  - the subject must be an obo:GO term (reverse of normal
				//    direction since the traversal is reversed)
				return gogo.ConnectedByAny(e, func(s *rdf.Statement) bool {
					return s.Predicate.Value == "<rdfs:subClassOf>" &&
						strings.HasPrefix(s.Subject.Value, "<obo:GO_")
				})
			},
		}
		bf.Walk(reverse{g}, r, func(n graph.Node, d int) bool {
			depths[n.(rdf.Term)] = depth{level: d, root: r}
			return false
		})
	}

	for t, d := range depths {
		fmt.Printf("depth of %s is %d in %s\n", t.Value, d.level, d.root.Value)
	}
}

// reverse implements the traverse.Graph reversing the direction of edges.
type reverse struct {
	*gogo.Graph
}

func (g reverse) From(id int64) graph.Nodes      { return g.Graph.To(id) }
func (g reverse) Edge(uid, vid int64) graph.Edge { return g.Graph.Edge(vid, uid) }
Output:

Types

type Descendant

type Descendant struct {
	Term  rdf.Term
	Depth int
}

Descendant represents a descendancy relationship.

type Graph

type Graph struct {
	// contains filtered or unexported fields
}

Graph implements a Gene Ontology graph.

Example
package main

import (
	"compress/gzip"
	"io"
	"log"
	"os"

	"gonum.org/v1/gonum/graph/formats/rdf"

	"github.com/kortschak/gogo"
)

func main() {
	f, err := os.Open("path/to/go.nt.gz")
	if err != nil {
		log.Fatal(err)
	}
	r, err := gzip.NewReader(f)
	if err != nil {
		log.Fatal(err)
	}

	dec := rdf.NewDecoder(r)
	var statements []*rdf.Statement
	for {
		s, err := dec.Unmarshal()
		if err != nil {
			if err != io.EOF {
				log.Fatalf("error during decoding: %v", err)
			}
			break
		}

		// Statements can be filtered at this point to exclude unwanted
		// or irrelevant parts of the graph.
		statements = append(statements, s)
	}
	f.Close()

	// Canonicalise blank nodes to reduce memory footprint.
	statements, err = rdf.URDNA2015(statements, statements)
	if err != nil {
		log.Fatal(err)
	}

	g := gogo.NewGraph()
	for _, s := range statements {
		g.AddStatement(s)
	}

	// Do something with the graph.
}
Output:

func NewGraph

func NewGraph() *Graph

NewGraph returns a new empty Graph.

func (*Graph) AddStatement

func (g *Graph) AddStatement(s *rdf.Statement)

AddStatement adds s to the graph. It panics rdf.Term UIDs in the statement are not consistent with existing terms in the graph. Statements must not be altered while being held by the graph. If the UID fields of the terms in s are zero, they will be set to values consistent with the rest of the graph on return, mutating the parameter, otherwise the UIDs must match terms that already exist in the graph. The statement must be a valid RDF statement otherwise AddStatement will panic. Predicate IRIs must either all be globally namespaced (prefixed with the http scheme) or all use the qualified name prefix, otherwise AddStatement will panic. Subject and object IRIs should match.

func (*Graph) AllStatements

func (g *Graph) AllStatements() *Statements

AllStatements returns an iterator of the statements that make up the graph.

func (*Graph) ClosestCommonAncestor

func (g *Graph) ClosestCommonAncestor(a, b rdf.Term) (r rdf.Term, ok bool)

ClosestCommonAncestor returns the term that is the closest common ancestor of a and b if it exists in g.

Example
package main

import (
	"fmt"
	"log"

	"github.com/kortschak/gogo"
)

var g *gogo.Graph

func main() {
	// Find the closest common ancestor of two terms in the GO DAG.

	// G protein-coupled bile acid receptor activity.
	aIRI := "<http://purl.obolibrary.org/obo/GO_0038182>"
	a, ok := g.TermFor(aIRI)
	if !ok {
		log.Fatalf("no node for %v", aIRI)
	}

	// coreceptor activity involved in canonical Wnt signaling pathway.
	bIRI := "<http://purl.obolibrary.org/obo/GO_1904928>"
	b, ok := g.TermFor(bIRI)
	if !ok {
		log.Fatalf("no node for %v", bIRI)
	}

	// We expect the closest common ancestor to be signaling receptor
	// activity <http://purl.obolibrary.org/obo/GO_0038023>.
	cca, ok := g.ClosestCommonAncestor(a, b)

	if !ok {
		log.Fatal("no common ancestor")
	}
	fmt.Printf("Closest common ancestor of %s and %s is %s", aIRI, bIRI, cca.Value)

}
Output:


Closest common ancestor of <http://purl.obolibrary.org/obo/GO_0038182> and <http://purl.obolibrary.org/obo/GO_1904928> is <http://purl.obolibrary.org/obo/GO_0038023>

func (*Graph) DescendantsOf

func (g *Graph) DescendantsOf(t rdf.Term) []Descendant

DescendantsOf returns all of the descendants of the given term.

Example
package main

import (
	"fmt"
	"log"

	"github.com/kortschak/gogo"
)

var g *gogo.Graph

func main() {
	// Find all the descendants of a term and their relative distance from
	// the sub-root.

	// canonical Wnt signaling pathway.
	aIRI := "<http://purl.obolibrary.org/obo/GO_0060070>"
	a, ok := g.TermFor(aIRI)
	if !ok {
		log.Fatalf("no node for %v", aIRI)
	}

	for _, d := range g.DescendantsOf(a) {
		fmt.Printf("%s %d\n", d.Term.Value, d.Depth)
	}

}
Output:


<http://purl.obolibrary.org/obo/GO_0003267> 2
<http://purl.obolibrary.org/obo/GO_0044328> 1
<http://purl.obolibrary.org/obo/GO_0044329> 1
<http://purl.obolibrary.org/obo/GO_0044330> 1
<http://purl.obolibrary.org/obo/GO_0044334> 1
<http://purl.obolibrary.org/obo/GO_0044335> 1
<http://purl.obolibrary.org/obo/GO_0044336> 1
<http://purl.obolibrary.org/obo/GO_0044337> 1
<http://purl.obolibrary.org/obo/GO_0044338> 1
<http://purl.obolibrary.org/obo/GO_0044339> 1
<http://purl.obolibrary.org/obo/GO_0044340> 1
<http://purl.obolibrary.org/obo/GO_0060823> 1
<http://purl.obolibrary.org/obo/GO_0060901> 1
<http://purl.obolibrary.org/obo/GO_0061290> 1
<http://purl.obolibrary.org/obo/GO_0061292> 1
<http://purl.obolibrary.org/obo/GO_0061316> 1
<http://purl.obolibrary.org/obo/GO_0100012> 1
<http://purl.obolibrary.org/obo/GO_0100067> 1
<http://purl.obolibrary.org/obo/GO_1904954> 1
<http://purl.obolibrary.org/obo/GO_0003136> 2
<http://purl.obolibrary.org/obo/GO_0044343> 2
<http://purl.obolibrary.org/obo/GO_0061291> 2
<http://purl.obolibrary.org/obo/GO_0061293> 2
<http://purl.obolibrary.org/obo/GO_0061310> 2
<http://purl.obolibrary.org/obo/GO_0061315> 2
<http://purl.obolibrary.org/obo/GO_0061317> 2
<http://purl.obolibrary.org/obo/GO_0061324> 2
<http://purl.obolibrary.org/obo/GO_1905474> 2

func (*Graph) Edge

func (g *Graph) Edge(uid, vid int64) graph.Edge

Edge returns the edge from u to v if such an edge exists and nil otherwise. The node v must be directly reachable from u as defined by the From method. The returned graph.Edge is a multi.Edge if an edge exists.

func (*Graph) Edges

func (g *Graph) Edges() graph.Edges

Edges returns all the edges in the graph. Each edge in the returned slice is a multi.Edge.

func (*Graph) From

func (g *Graph) From(id int64) graph.Nodes

From returns all nodes in g that can be reached directly from n.

The returned graph.Nodes is only valid until the next mutation of the receiver.

func (*Graph) FromSubject

func (g *Graph) FromSubject(t rdf.Term) graph.Nodes

FromSubject returns all nodes in g that can be reached directly from an RDF subject term.

The returned graph.Nodes is only valid until the next mutation of the receiver.

func (*Graph) HasEdgeBetween

func (g *Graph) HasEdgeBetween(xid, yid int64) bool

HasEdgeBetween returns whether an edge exists between nodes x and y without considering direction.

func (*Graph) HasEdgeFromTo

func (g *Graph) HasEdgeFromTo(uid, vid int64) bool

HasEdgeFromTo returns whether an edge exists in the graph from u to v.

func (*Graph) IsDescendantOf

func (g *Graph) IsDescendantOf(a, q rdf.Term) (yes bool, depth int)

IsDescendantOf returns whether the query q is a descendant of a and how many levels separate them if it is. If q is not a descendant of a, depth will be negative.

Example
package main

import (
	"fmt"
	"log"

	"github.com/kortschak/gogo"
)

var g *gogo.Graph

func main() {
	// Check whether a term is a GO descendant of another in the sub-class
	// hierarchy.

	// coreceptor activity involved in canonical Wnt signaling pathway.
	aIRI := "<http://purl.obolibrary.org/obo/GO_1904928>"
	a, ok := g.TermFor(aIRI)
	if !ok {
		log.Fatalf("no node for %v", aIRI)
	}

	// signaling receptor activity.
	bIRI := "<http://purl.obolibrary.org/obo/GO_0038023>"
	b, ok := g.TermFor(bIRI)
	if !ok {
		log.Fatalf("no node for %v", bIRI)
	}

	yes, depth := g.IsDescendantOf(a, b)
	fmt.Printf("%s is descendant of %s = %t (%d levels apart)\n", bIRI, aIRI, yes, depth)

	yes, depth = g.IsDescendantOf(b, a)
	fmt.Printf("%s is descendant of %s = %t (%d levels apart)\n", aIRI, bIRI, yes, depth)

}
Output:


<http://purl.obolibrary.org/obo/GO_0038023> is descendant of <http://purl.obolibrary.org/obo/GO_1904928> = false (-1 levels apart)
<http://purl.obolibrary.org/obo/GO_1904928> is descendant of <http://purl.obolibrary.org/obo/GO_0038023> = true (3 levels apart)

func (*Graph) Lines

func (g *Graph) Lines(uid, vid int64) graph.Lines

Lines returns the lines from u to v if such any such lines exists and nil otherwise. The node v must be directly reachable from u as defined by the From method.

func (*Graph) Node

func (g *Graph) Node(id int64) graph.Node

Node returns the node with the given ID if it exists in the graph, and nil otherwise.

func (*Graph) Nodes

func (g *Graph) Nodes() graph.Nodes

Nodes returns all the nodes in the graph.

The returned graph.Nodes is only valid until the next mutation of the receiver.

func (*Graph) Predicates

func (g *Graph) Predicates() []rdf.Term

Predicates returns a slice of all the predicates used in the graph.

func (*Graph) Query

func (g *Graph) Query(from ...rdf.Term) Query

Query returns a query of the receiver starting from the given nodes. Queries may not be mixed between distinct graphs.

func (*Graph) RemoveStatement

func (g *Graph) RemoveStatement(s *rdf.Statement)

RemoveStatement removes s from the graph, leaving the terminal nodes if they are part of another statement. If the statement does not exist in g it is a no-op.

func (*Graph) RemoveTerm

func (g *Graph) RemoveTerm(t rdf.Term)

RemoveTerm removes t and any statements referencing t from the graph. If the term is a predicate, all statements with the predicate are removed. If the term does not exist it is a no-op.

func (*Graph) Roots

func (g *Graph) Roots(force bool) []rdf.Term

Roots returns all the roots of the graph. It will first attempt to find roots from the three known roots molecular_function, cellular_component and biological_process in the appropriate namespace and if none can be found, will search from all GO terms for the complete set of roots. If force is true, a complete search will be done.

Example
package main

import (
	"fmt"

	"github.com/kortschak/gogo"
)

var g *gogo.Graph

func main() {
	// Check that all GO terms are rooted in the three ontology sources.

	// If force is false, the available subset of roots is returned
	// if none is found or force is true then a search is made from
	// all parts of the DAG.
	for _, r := range g.Roots(true) {
		fmt.Println(r.Value)
	}

}
Output:


<http://purl.obolibrary.org/obo/GO_0003674>
<http://purl.obolibrary.org/obo/GO_0005575>
<http://purl.obolibrary.org/obo/GO_0008150>

func (*Graph) Statements

func (g *Graph) Statements(uid, vid int64) *Statements

Statements returns an iterator of the statements that connect the subject term node u to the object term node v.

func (*Graph) TermFor

func (g *Graph) TermFor(text string) (term rdf.Term, ok bool)

TermFor returns the rdf.Term for the given text. The text must be an exact match for the rdf.Term's Value field.

func (*Graph) To

func (g *Graph) To(id int64) graph.Nodes

To returns all nodes in g that can reach directly to n.

The returned graph.Nodes is only valid until the next mutation of the receiver.

func (*Graph) ToObject

func (g *Graph) ToObject(t rdf.Term) graph.Nodes

ToObject returns all nodes in g that can reach directly to an RDF object term.

The returned graph.Nodes is only valid until the next mutation of the receiver.

type Query

type Query struct {
	// contains filtered or unexported fields
}

Query represents a step in a graph query.

Example (Annotation)
package main

import (
	"compress/gzip"
	"fmt"
	"io"
	"log"
	"os"
	"strings"

	"gonum.org/v1/gonum/graph/formats/rdf"

	"github.com/kortschak/gogo"
)

func main() {
	g := gogo.NewGraph()
	var dec rdf.Decoder
	// Takes two command line parameters, an N-Triples containing
	// the SO_transcribed_from predicates of homo_sapiens.ttl and
	// an N-Triples containing the <rdfs:seeAlso> <obo:GO_*>
	// statements of homo_sapiens_xrefs.ttl.
	for _, path := range os.Args[1:] {
		f, err := os.Open(path)
		if err != nil {
			log.Fatal(err)
		}
		r, err := gzip.NewReader(f)
		if err != nil {
			log.Fatal(err)
		}

		dec.Reset(r)
		var statements []*rdf.Statement
		for {
			s, err := dec.Unmarshal()
			if err != nil {
				if err != io.EOF {
					log.Fatalf("error during decoding: %v", err)
				}
				break
			}

			s.Subject.UID = 0
			s.Predicate.UID = 0
			s.Object.UID = 0
			statements = append(statements, s)
		}
		f.Close()

		for _, s := range statements {
			g.AddStatement(s)
		}
	}

	nodes := g.Nodes()
	for nodes.Next() {
		gene := nodes.Node().(rdf.Term)
		if !strings.HasPrefix(gene.Value, "<ensembl:") {
			continue
		}

		// We are emitting directly, so we need to ensure statement
		// uniqueness. A seen per start node is enough for this. If
		// we were adding to another graph, the deduplication could
		// be handled by the destination graph.
		seen := make(map[int64]bool)

		// Get all GO terms reachable from the ENSG via an ENST
		// since that is how the Ensembl GO annotation work.
		terms := g.Query(gene).In(func(s *rdf.Statement) bool {
			// <transcript:Y> <obo:SO_transcribed_from> <ensembl:X> .
			return s.Predicate.Value == "<obo:SO_transcribed_from>"

		}).Out(func(s *rdf.Statement) bool {
			if seen[s.Object.UID] {
				return false
			}

			// <transcript:Y> <rdfs:seeAlso> <obo:GO_Z> .
			ok := s.Predicate.Value == "<rdfs:seeAlso>" &&
				strings.HasPrefix(s.Object.Value, "<obo:GO_")
			if ok {
				seen[s.Object.UID] = true
			}
			return ok

		}).Result()

		for _, t := range terms {
			fmt.Println(&rdf.Statement{
				Subject:   rdf.Term{Value: t.Value},
				Predicate: rdf.Term{Value: "<local:annotates>"},
				Object:    rdf.Term{Value: gene.Value},
			})
		}
	}
}
Output:

func (Query) And

func (q Query) And(p Query) Query

And returns a query that holds the disjunction of q and p.

func (Query) In

func (q Query) In(fn func(s *rdf.Statement) bool) Query

In returns a query holding nodes reachable in from the receiver's starting nodes via statements that satisfy fn.

func (Query) Not

func (q Query) Not(p Query) Query

Not returns a query that holds q less p.

func (Query) Or

func (q Query) Or(p Query) Query

Or returns a query that holds the conjunction of q and p.

func (Query) Out

func (q Query) Out(fn func(s *rdf.Statement) bool) Query

Out returns a query holding nodes reachable out from the receiver's starting nodes via statements that satisfy fn.

func (Query) Result

func (q Query) Result() []rdf.Term

Result returns the terms held by the query.

func (Query) Unique

func (q Query) Unique() Query

Unique returns a copy of the receiver that contains only one instance of each term.

type Statements

type Statements struct {
	// contains filtered or unexported fields
}

Statements is an RDF statement iterator.

func (*Statements) Next

func (s *Statements) Next() bool

Next returns whether the iterator holds any additional statements.

func (*Statements) Statement

func (s *Statements) Statement() *rdf.Statement

Statement returns the current statement.

Directories

Path Synopsis
internal
set
Package set provides integer and graph.Node sets.
Package set provides integer and graph.Node sets.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL