xsel

package module
v0.9.15 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 25, 2024 License: MIT Imports: 6 Imported by: 0

README

xsel

Donate Go Reference

xsel is a library that (almost) implements the XPath 1.0 specification. The non-compliant bits are:

  • xsel does not implement the id function.
  • The grammar as defined in the XPath 1.0 spec doesn't explicitly allow function calls in the middle of a path expression, such as /path/function-call()/path. xsel allows function calls in the middle of path expressions.
  • xsel allows name lookups with a wildcard for the namespace, such as /*:path.
  • xsel allows the # character in element selections.

Basic usage

package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<root>
	<a>This is an XML node.</a>
</root>
`

	xpath := xsel.MustBuildExpr(`/root/a`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath)

	fmt.Println(result)
	// Output: This is an XML node.
}

Binding variables and namespaces

package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<root xmlns="http://some.namespace.com">
	<a xmlns="http://some.namespace.com">This is an XML node with a namespace prefix.</a>
</root>
`

	xpath := xsel.MustBuildExpr(`/ns:root/ns:a`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath, xsel.WithNS("ns", "http://some.namespace.com"))

	fmt.Println(result)
	// Output: This is an XML node with a namespace prefix.
}

Binding variables

package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<root>
	<node>2.50</node>
	<node>3.14</node>
	<node>0.30</node>
</root>
`

	const NS = "http://some.namespace.com"

	xpath := xsel.MustBuildExpr(`//node()[. = $ns:mynum]`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath, xsel.WithNS("ns", NS), xsel.WithVariableNS(NS, "mynum", xsel.Number(3.14)))

	fmt.Println(result)
	// Output: 3.14
}

Binding custom functions

package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<root>
	<a>This is an element.</a>
	<!-- This is a comment. -->
</root>
`

	isComment := func(context xsel.Context, args ...xsel.Result) (xsel.Result, error) {
		nodeSet, isNodeSet := context.Result().(xsel.NodeSet)

		if !isNodeSet || len(nodeSet) == 0 {
			return xsel.Bool(false), nil
		}

		_, isComment := nodeSet[0].Node().(xsel.Comment)
		return xsel.Bool(isComment), nil
	}

	xpath := xsel.MustBuildExpr(`//node()[is-comment()]`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath, xsel.WithFunction("is-comment", isComment))

	fmt.Println(result)
	// Output: This is a comment.
}

Unmarshal result into a struct

package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<Root xmlns="http://www.adventure-works.com">
	<Customers>
		<Customer CustomerID="GREAL">
			<CompanyName>Great Lakes Food Market</CompanyName>
			<ContactName>Howard Snyder</ContactName>
			<ContactTitle>Marketing Manager</ContactTitle>
			<FullAddress>
				<Address>2732 Baker Blvd.</Address>
				<City>Eugene</City>
				<Region>OR</Region>
			</FullAddress>
		</Customer>
		<Customer CustomerID="HUNGC">
		  <CompanyName>Hungry Coyote Import Store</CompanyName>
		  <ContactName>Yoshi Latimer</ContactName>
		  <FullAddress>
			<Address>City Center Plaza 516 Main St.</Address>
			<City>Walla Walla</City>
			<Region>WA</Region>
		  </FullAddress>
		</Customer>
	</Customers>
</Root>
`

	type Address struct {
		Address string `xsel:"NS:Address"`
		City    string `xsel:"NS:City"`
		Region  string `xsel:"NS:Region"`
	}

	type Customer struct {
		Id          string  `xsel:"@CustomerID"`
		Name        string  `xsel:"NS:CompanyName"`
		ContactName string  `xsel:"NS:ContactName"`
		Address     Address `xsel:"NS:FullAddress"`
	}

	type Customers struct {
		Customers []Customer `xsel:"NS:Customers/NS:Customer"`
	}

	contextSettings := xsel.WithNS("NS", "http://www.adventure-works.com")
	xpath := xsel.MustBuildExpr(`/NS:Root`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath, contextSettings)

	customers := Customers{}
	xsel.Unmarshal(result, &customers, contextSettings) // Remember to check for errors

	fmt.Printf("%+v\n", customers)
	// Output: {Customers:[{Id:GREAL Name:Great Lakes Food Market ContactName:Howard Snyder Address:{Address:2732 Baker Blvd. City:Eugene Region:OR}} {Id:HUNGC Name:Hungry Coyote Import Store ContactName:Yoshi Latimer Address:{Address:City Center Plaza 516 Main St. City:Walla Walla Region:WA}}]}
}

Extensible

xsel supplies an XML parser (using the encoding/xml package) out of the box, but the XPath logic does not depend directly on XML. It instead depends on the interfaces defined in the node and store packages. This means it's possible to use xsel for querying against non-XML documents. The parser package supplies methods for parsing XML, HTML, and JSON documents.

To build a custom document, implement your own Parser method, and build Element's, Attribute's Character Data, Comment's, Processing Instruction's, and Namespace's.

HTML documents

Use the xsel.ReadHtml function to read HTML documents. Namespaces are completely ignored for HTML documents. Keep all queries in the default namespace. Write queries such as //svg. Do not write queries such as //svg:svg.

JSON documents

JSON documents only build elements and character data. Object declarations will omit an element node with the name #obj. Likewise, array elements emit #arr. So for example, given the following JSON file:

{
	"states": ["AK", ["MD", "FL"] ]
}

It would look like this in XML...

<#obj>
	<states>
		<#arr>
			AK
			<#arr>
				MD
				FL
			</#arr>
		</#arr>
	</states>
</#obj>

... however, MD and FL are separate text nodes, which is different from XML parsing:

package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	json := `
{
	"states": ["AK", ["MD", "FL"] ]
}
`

	xpath := xsel.MustBuildExpr(`/#obj/states/#arr/text()`)
	cursor, _ := xsel.ReadJson(bytes.NewBufferString(json))
	result, _ := xsel.Exec(cursor, &xpath)

	fmt.Println(result)

	// Notice the [2] in the text selection.
	xpath = xsel.MustBuildExpr(`/#obj/states/#arr/#arr/text()[2]`)
	result, _ = xsel.Exec(cursor, &xpath)

	fmt.Println(result)
	// Output: AK
	// FL
}

Commandline Utility

xsel supplies a grep-like commandline utility for querying XML documents:

$ go install github.com/ChrisTrenkamp/xsel/xsel@latest
$ xsel -h
Usage of xsel:
  -a    If the result is a NodeSet, print the string value of all the nodes instead of just the first
  -c int
        Run queries in the given number of concurrent workers (beware that results will have no predictable order) (default 1)
  -e value
        Bind an entity value e.g. entityname=entityval
  -m    If the result is a NodeSet, print all the results as XML
  -n    Suppress filenames
  -r    Recursively traverse directories
  -s value
        Namespace mapping. e.g. -ns companyns=http://company.com
  -t string
        Force xsel to parse files as the given type.  Can be 'xml', 'html', or 'json'.  If unspecified, the file will be detected by its MIME type.  Must be specified when reading from stdin.
  -u    Turns off strict XML decoding
  -v value
        Bind a variable (all variables are bound as string types) e.g. -v var=value or -v companyns:var=value
  -x string
        XPath expression to execute (required)

CLI examples

$ cat test.xml
<?xml version="1.0" encoding="UTF-8"?>
<root>
  <a xmlns="http://a">Element a</a>
  <b>Element b</b>
</root>

This is a basic query:

$ xsel -x '/root/b' test.xml
test.xml: Element b

This is a basic query on stdin:

$ cat foo.xml | xsel -x '/root/b' -
Element b

This query has multiple results, but only the first value is printed:

$ xsel -x '/root/*' test.xml
test.xml: Element a

This query has multiple results, and all values are printed:

$ xsel -x '/root/*' -a test.xml
test.xml: Element a
test.xml: Element b

Print all results as XML:

$ xsel -x '/root/*' -m test.xml
test.xml: <a xmlns="http://a">Element a</a>
test.xml: <b>Element b</b>

Suppress the filename when printing results:

$ xsel -x '/root/*' -m -n test.xml
<a xmlns="http://a">Element a</a>
<b>Element b</b>

Bind a namespace:

$ xsel -x '//a:*' -s a='http://a' -m test.xml
test.xml: <a xmlns="http://a">Element a</a>

Bind a variable (variables are bound as strings):

$ xsel -x '//*[. = $textval]' -v textval="Element b" test.xml
test.xml: Element b

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func Unmarshal added in v0.9.13

func Unmarshal(result Result, value any, settings ...ContextApply) error

Unmarshal maps a XPath result to a struct or slice. When unmarshaling a slice, the result must be a NodeSet. When unmarshaling a struct, the result must be a NodeSet with one result. To unmarshal a value to a struct field, give it a "xsel" tag name, and a XPath expression for its value (e.g. `xsel:"//my-struct[@my-id = 'my-value']"`).

For struct fields, Unmarshal can set fields that are ints and uints, bools, strings, slices, and nested structs.

For slice elements, Unmarshal can set ints and uints, bools, strings, and structs. It cannot Unmarshal multidimensional slices.

Arrays, maps, and channels are not supported.

Example
package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<Root xmlns="http://www.adventure-works.com">
	<Customers>
		<Customer CustomerID="GREAL">
			<CompanyName>Great Lakes Food Market</CompanyName>
			<ContactName>Howard Snyder</ContactName>
			<ContactTitle>Marketing Manager</ContactTitle>
			<FullAddress>
				<Address>2732 Baker Blvd.</Address>
				<City>Eugene</City>
				<Region>OR</Region>
			</FullAddress>
		</Customer>
		<Customer CustomerID="HUNGC">
		  <CompanyName>Hungry Coyote Import Store</CompanyName>
		  <ContactName>Yoshi Latimer</ContactName>
		  <FullAddress>
			<Address>City Center Plaza 516 Main St.</Address>
			<City>Walla Walla</City>
			<Region>WA</Region>
		  </FullAddress>
		</Customer>
	</Customers>
</Root>
`

	type Address struct {
		Address string `xsel:"NS:Address"`
		City    string `xsel:"NS:City"`
		Region  string `xsel:"NS:Region"`
	}

	type Customer struct {
		Id          string  `xsel:"@CustomerID"`
		Name        string  `xsel:"NS:CompanyName"`
		ContactName string  `xsel:"NS:ContactName"`
		Address     Address `xsel:"NS:FullAddress"`
	}

	type Customers struct {
		Customers []Customer `xsel:"NS:Customers/NS:Customer"`
	}

	contextSettings := xsel.WithNS("NS", "http://www.adventure-works.com")
	xpath := xsel.MustBuildExpr(`/NS:Root`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath, contextSettings)

	customers := Customers{}
	_ = xsel.Unmarshal(result, &customers, contextSettings) // Remember to check for errors

	fmt.Printf("%+v\n", customers)
}
Output:

{Customers:[{Id:GREAL Name:Great Lakes Food Market ContactName:Howard Snyder Address:{Address:2732 Baker Blvd. City:Eugene Region:OR}} {Id:HUNGC Name:Hungry Coyote Import Store ContactName:Yoshi Latimer Address:{Address:City Center Plaza 516 Main St. City:Walla Walla Region:WA}}]}

func WithFunction added in v0.9.13

func WithFunction(local string, fn Function) func(c *ContextSettings)

WithFunction binds a custom function name with no namespace to a XPath query.

Example
package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<root>
	<a>This is an element.</a>
	<!-- This is a comment. -->
</root>
`

	isComment := func(context xsel.Context, args ...xsel.Result) (xsel.Result, error) {
		nodeSet, isNodeSet := context.Result().(xsel.NodeSet)

		if !isNodeSet || len(nodeSet) == 0 {
			return xsel.Bool(false), nil
		}

		_, isComment := nodeSet[0].Node().(xsel.Comment)
		return xsel.Bool(isComment), nil
	}

	xpath := xsel.MustBuildExpr(`//node()[is-comment()]`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath, xsel.WithFunction("is-comment", isComment))

	fmt.Println(result)
}
Output:

This is a comment.

func WithFunctionNS added in v0.9.13

func WithFunctionNS(space, local string, fn Function) func(c *ContextSettings)

WithFunctionNS binds a custom function name with a namespace to a XPath query.

func WithFunctionName added in v0.9.13

func WithFunctionName(name XmlName, fn Function) func(c *ContextSettings)

WithFunctionName binds a custom function name with a namespace to a XPath query.

func WithNS added in v0.9.13

func WithNS(name, url string) func(c *ContextSettings)

WithNS binds a namespace name to a XPath query.

Example
package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<root xmlns="http://some.namespace.com">
	<a xmlns="http://some.namespace.com">This is an XML node with a namespace prefix.</a>
</root>
`

	xpath := xsel.MustBuildExpr(`/ns:root/ns:a`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath, xsel.WithNS("ns", "http://some.namespace.com"))

	fmt.Println(result)
}
Output:

This is an XML node with a namespace prefix.

func WithVariable added in v0.9.13

func WithVariable(local string, value Result) func(c *ContextSettings)

WithVariable binds a variable name with no namespace to a XPath query.

func WithVariableNS added in v0.9.13

func WithVariableNS(space, local string, value Result) func(c *ContextSettings)

WithVariableNS binds a variable name with a namespace to a XPath query.

Example
package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<root>
	<node>2.50</node>
	<node>3.14</node>
	<node>0.30</node>
</root>
`

	const NS = "http://some.namespace.com"

	xpath := xsel.MustBuildExpr(`//node()[. = $ns:mynum]`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath, xsel.WithNS("ns", NS), xsel.WithVariableNS(NS, "mynum", xsel.Number(3.14)))

	fmt.Println(result)
}
Output:

3.14

func WithVariableName added in v0.9.13

func WithVariableName(name XmlName, value Result) func(c *ContextSettings)

WithVariableName binds a variable name with a namespace to a XPath query.

Types

type Attribute

type Attribute = node.Attribute

type Bool added in v0.9.13

type Bool = exec.Bool

type CharData

type CharData = node.CharData

type Comment

type Comment = node.Comment

type Context added in v0.9.13

type Context = exec.Context

type ContextApply added in v0.9.13

type ContextApply = exec.ContextApply

type ContextSettings added in v0.9.13

type ContextSettings = exec.ContextSettings

type Cursor

type Cursor = store.Cursor

func ReadHtml added in v0.9.13

func ReadHtml(in io.Reader) (Cursor, error)

ReadHtml parses the given HTML document and stores the node in memory.

func ReadJson added in v0.9.13

func ReadJson(in io.Reader) (Cursor, error)

ReadJson parses the given JSON document and stores the node in memory.

Example
package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	json := `
{
	"states": ["AK", ["MD", "FL"] ]
}
`

	xpath := xsel.MustBuildExpr(`/#obj/states/#arr/text()`)
	cursor, _ := xsel.ReadJson(bytes.NewBufferString(json))
	result, _ := xsel.Exec(cursor, &xpath)

	fmt.Println(result)

	xpath = xsel.MustBuildExpr(`/#obj/states/#arr/#arr/text()[2]`)
	result, _ = xsel.Exec(cursor, &xpath)

	fmt.Println(result)
}
Output:

AK
FL

func ReadXml added in v0.9.13

func ReadXml(in io.Reader, opts ...XmlParseOptions) (Cursor, error)

ReadXml parses the given XML document and stores the node in memory.

type Element

type Element = node.Element

type Function added in v0.9.13

type Function = exec.Function

type Grammar added in v0.9.13

type Grammar = grammar.Grammar

func BuildExpr added in v0.9.13

func BuildExpr(xpath string) (Grammar, error)

BuildExpr creates an XPath query.

func MustBuildExpr added in v0.9.13

func MustBuildExpr(xpath string) Grammar

MustBuildExpr is like BuildExpr, but panics if an error is thrown.

type NamedNode added in v0.9.13

type NamedNode = node.NamedNode

type Namespace

type Namespace = node.Namespace

type Node

type Node = node.Node

type NodeSet

type NodeSet = exec.NodeSet

type Number added in v0.9.13

type Number = exec.Number

type Parser added in v0.9.13

type Parser = parser.Parser

type ProcInst

type ProcInst = node.ProcInst

type Result added in v0.9.13

type Result = exec.Result

func Exec added in v0.9.13

func Exec(cursor Cursor, expr *Grammar, settings ...ContextApply) (Result, error)

Exec executes an XPath query against the given Cursor and returns the result.

Example
package main

import (
	"bytes"
	"fmt"

	"github.com/ChrisTrenkamp/xsel"
)

func main() {
	xml := `
<root>
	<a>This is an XML node.</a>
</root>
`

	xpath := xsel.MustBuildExpr(`/root/a`)
	cursor, _ := xsel.ReadXml(bytes.NewBufferString(xml))
	result, _ := xsel.Exec(cursor, &xpath)

	fmt.Println(result)
}
Output:

This is an XML node.

type Root

type Root = node.Root

type String added in v0.9.13

type String = exec.String

type XmlName added in v0.9.13

type XmlName = exec.XmlName

func GetQName added in v0.9.13

func GetQName(input string, namespaces map[string]string) (XmlName, error)

type XmlParseOptions added in v0.9.13

type XmlParseOptions = parser.XmlParseOptions

Directories

Path Synopsis
lexer
Package lexer is generated by GoGLL.
Package lexer is generated by GoGLL.
parser
Package parser is generated by gogll.
Package parser is generated by gogll.
parser/bsr
Package bsr implements a Binary Subtree Representation set as defined in
Package bsr implements a Binary Subtree Representation set as defined in
parser/slot
Package slot is generated by gogll.
Package slot is generated by gogll.
parser/symbols
Package symbols is generated by gogll.
Package symbols is generated by gogll.
sppf
Package sppf implements a Shared Packed Parse Forest as defined in:
Package sppf implements a Shared Packed Parse Forest as defined in:
token
Package token is generated by GoGLL.
Package token is generated by GoGLL.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL