saihon

package module
v1.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 13, 2023 License: MIT Imports: 8 Imported by: 0

README

saihon

JavaScript-like HTML parser for Go language.


GoDoc Test



Usage


import (
    "github.com/saihon/saihon"
)

func main() {
    text := "<html><head></head><body></body></html>"

    // parse from text HTML
    document, err := saihon.Parse(strings.NewReader(text))
    if err != nil {
       return
    }

    documentElement := document.DocumentElement()
    all     := document.All()
    body    := document.Body()
    title   := document.Title() // title string
    head    := document.Head()
    form    := document.Form()
    images  := document.Images()
    links   := document.Links()
    anchors := document.Anchors()


    element := document.GetElementById("id")
    element = document.QuerySelector("div > p")
    // should be verified
    if element != nil {
        textcontent := element.TextContent()
        // ...
    }


    // returns collection
    elements := document.GetElementsByClassName("class")
    elements = document.QuerySelectorAll("div > p")
    elements = document.GetElementsByName("name")
    elements = document.GetElementsByTagName("p")

    // each element
    for i := 0; i < elements.Length(); i++ {
        outerhtml := elements.Get(i).OuterHTML()
        // ...
    }
    // or 
    for element := range elements.Enumerator() {
        outerhtml := element.OuterHTML()
        // ...
    }


    // set
    element.TextContent("hello")
    // get
    textcontent := element.TextContent()
    // set
    element.InnerHTML("<p>hello</p>")
    // get
    innerhtml := element.InnerHTML()

    // get id
    id := element.HasAttribute("id")
    // get class name
    classname := element.GetAttribute("class")
    // set attribute
    element.SetAttribute("key", "value")
    // remove
    element.RemoveAttribute("key")
}



License

MIT License



Documentation

Overview

Example
package main

import (
	"fmt"
	"log"
	"strings"

	"github.com/saihon/saihon"
	"github.com/saihon/saihon/utils"
)

func main() {

	text := `
<html>
<head></head>
<body>
	<div id="id">hello</div>
</body>
</html>`

	doc, err := saihon.Parse(strings.NewReader(text))
	if err != nil {
		log.Fatal(err)
	}

	v := doc.GetElementById("id")
	if v == nil {
		return
	}
	fmt.Println(v.TextContent()) // hello

	// Attribute
	// set
	v.SetAttribute("class", "class-1")
	// get
	classname := v.GetAttribute("class")
	fmt.Println(classname) // class-1

	// get body
	body := doc.Body()

	// remove
	body.RemoveChild(v)

	// create element
	div := saihon.CreateElement("div")
	// create text node
	textnode := saihon.CreateTextNode("hello world")
	div.AppendChild(textnode)
	body.AppendChild(div)

	// remove itself
	div.Remove()

	for _, v := range []string{"foo", "bar", "baz"} {
		p := saihon.CreateElement("p")
		t := saihon.CreateTextNode(v)
		p.AppendChild(t)
		body.AppendChild(p)
	}

	collection := body.GetElementsByTagName("p")
	for i := 0; i < collection.Length(); i++ {
		element := collection.Get(i)
		fmt.Println(element.InnerText())
	}
	//
	// or
	//
	for element := range collection.Enumerator() {
		fmt.Println(element.OuterHTML())
	}

	utils.Empty(body.Node)
}
Output:

Index

Examples

Constants

View Source
const (
	Beforebegin = Position(utils.Beforebegin)
	Afterbegin  = Position(utils.Afterbegin)
	Beforeend   = Position(utils.Beforeend)
	Afterend    = Position(utils.Afterend)
)

Variables

This section is empty.

Functions

This section is empty.

Types

type Collection

type Collection struct {
	Nodes []*html.Node
}

Collection

func (Collection) Enumerator

func (e Collection) Enumerator() chan *Element

Enumerator can calls with for..range for element := range elements.Enumerator()...

func (Collection) Get

func (e Collection) Get(index int) *Element

Get returns the "*Element" given index

func (Collection) Length

func (e Collection) Length() int

Length

type DOMTokenList

type DOMTokenList struct {
	List  []string
	Value string
}

DOMTokenList

func (DOMTokenList) Length

func (t DOMTokenList) Length() int

Length

type Document

type Document Element

func Parse

func Parse(r io.Reader) (*Document, error)

Parse form io.Reader

Example (Bytes)

This example shows how to use parse from bytes

package main

import (
	"bytes"
	"fmt"
	"log"

	"github.com/saihon/saihon"
)

func main() {
	text := []byte(`<html><head></head><body></body></html>`)

	document, err := saihon.Parse(bytes.NewReader(text))
	if err != nil {
		log.Fatal(err)
	}

	fmt.Println(document.Title())
}
Output:

Example (File)

This example shows how to use parse from file

package main

import (
	"fmt"
	"log"
	"os"

	"github.com/saihon/saihon"
)

func main() {
	fp, err := os.Open("index.html")
	if err != nil {
		log.Fatal(err)
	}
	defer fp.Close()

	document, err := saihon.Parse(fp)
	if err != nil {
		log.Fatal(err)
	}

	fmt.Println(document.Title())
}
Output:

Example (Httpresponse)

This example shows how to use parse from http response

package main

import (
	"fmt"
	"log"
	"net/http"

	"github.com/saihon/saihon"
)

func main() {
	resp, err := http.Get("https://example.com")
	if err != nil {
		log.Fatal(err)
	}
	defer resp.Body.Close()

	document, err := saihon.Parse(resp.Body)
	if err != nil {
		log.Fatal(err)
	}

	fmt.Println(document.Title())
}
Output:

Example (String)

This example shows how to use parse from string

package main

import (
	"fmt"
	"log"
	"strings"

	"github.com/saihon/saihon"
)

func main() {
	text := `<html><head></head><body></body></html>`

	document, err := saihon.Parse(strings.NewReader(text))
	if err != nil {
		log.Fatal(err)
	}

	fmt.Println(document.Title())
}
Output:

func (Document) All

func (d Document) All() Collection

All returns all elements of node type ElementNode

func (Document) Anchors

func (d Document) Anchors() Collection

Anchors returns all of <a> element these have "name" attribute

func (Document) AppendChild

func (d Document) AppendChild(c *Element)

AppendChild append "*Element" as a last child

func (Document) Body

func (d Document) Body() *Element

Body returns <body> element

func (Document) ChildElementCount

func (d Document) ChildElementCount() int

ChildElementCount returns the number of html.ElementNode

func (Document) ChildNodes

func (d Document) ChildNodes() []*html.Node

ChildNodes returns all of child nodes

func (Document) Children

func (d Document) Children() Collection

Children returns all of the child html.ElementNode as the "Collection"

func (Document) CloneNode

func (d Document) CloneNode() *Document

CloneNode clone "Document"

func (Document) CreateElement

func (_ Document) CreateElement(tagname string) *Element

CreateElement can be called from the "Document" has no meaning and is the same the above

func (Document) CreateTextNode

func (_ Document) CreateTextNode(text string) *Element

CreateTextNode same the above

func (Document) DocumentElement

func (d Document) DocumentElement() *Element

DocumentElement returns <html> element

func (Document) FirstChild

func (d Document) FirstChild() *html.Node

FirstChild returns first child node

func (Document) FirstElementChild

func (d Document) FirstElementChild() *Element

FirstElementChild returns first html.ElementNode as the "*Element"

func (Document) Form

func (d Document) Form() Collection

Form returns all <form> element

func (Document) GetByClass

func (d Document) GetByClass(classname string) Collection

GetByClass alias `GetElementsByClassName'

func (Document) GetById

func (d Document) GetById(id string) *Element

GetById alias `GetElementById'

func (Document) GetByName

func (d Document) GetByName(name string) Collection

GetByName alias `GetElementsByName'

func (Document) GetByTag

func (d Document) GetByTag(tagname string) Collection

GetByTag alias `GetElementsByTagName'

func (Document) GetElementById

func (d Document) GetElementById(id string) *Element

GetElementById find the element have specified id

func (Document) GetElementsByClassName

func (d Document) GetElementsByClassName(classname string) Collection

GetElementsByClassName find the all elements have specified classname

func (Document) GetElementsByName

func (d Document) GetElementsByName(name string) Collection

GetElementsByName find the all elements have specified name

func (Document) GetElementsByTagName

func (d Document) GetElementsByTagName(tagname string) Collection

GetElementsByTagName find the all elements have specified tagname

func (Document) HasChildNodes

func (d Document) HasChildNodes() bool

HasChildNodes returns true if "Document" has node

func (Document) Head

func (d Document) Head() *Element

Head returns <head> element

func (Document) Images

func (d Document) Images() Collection

Images returns all <img> element

func (Document) InsertBefore

func (d Document) InsertBefore(newChild, oldChild *Element)

InsertBefore inserts a newElement before the oldElement as a child of a "Document".

func (Document) LastChild

func (d Document) LastChild() *html.Node

LastChild returns last child node

func (Document) LastElementChild

func (d Document) LastElementChild() *Element

LastElementChild returns the last child html.ElementNode as the "*Element"

func (d Document) Links() Collection

Links returns all of <a> and <area> element these have "href" attribute

func (Document) NextElementSibling

func (d Document) NextElementSibling() *Element

NextElementSibling - returns nil!!

func (Document) NextSibling

func (d Document) NextSibling() *html.Node

NextSibling - returns nil!!

func (Document) ParentElement

func (d Document) ParentElement() *Element

ParentElement - returns nil!!

func (Document) ParentNode

func (d Document) ParentNode() *html.Node

ParentNode - returns nil!!

func (Document) PreviousElementSibling

func (d Document) PreviousElementSibling() *Element

PreviousElementSibling - returns nil!!

func (Document) PreviousSibling

func (d Document) PreviousSibling() *html.Node

PreviousSibling - returns nil!!

func (Document) Query

func (d Document) Query(selector string) *Element

Query alias `QuerySelector'

func (Document) QueryAll

func (d Document) QueryAll(selector string) Collection

QueryAll alias `QuerySelectorAll'

func (Document) QuerySelector

func (d Document) QuerySelector(s string) *Element

QuerySelector find the first element have specified css selector

func (Document) QuerySelectorAll

func (d Document) QuerySelectorAll(s string) Collection

QuerySelectorAll find the all elements have specified css selector

func (Document) RemoveChild

func (d Document) RemoveChild(c *Element)

RemoveChild remove a given the "*Element" specified "*Element" is must be the child of "Document"

func (Document) ReplaceChild

func (d Document) ReplaceChild(newElement, oldElement *Element) *Element

ReplaceChild replace oldElement to newElement given "*Element" is both the must be "Document" child, and same node type

func (Document) TextContent

func (d Document) TextContent(text ...string) string

TextContent - returns nil!!

func (Document) Title

func (d Document) Title() string

Title returns a title text

type Element

type Element struct {
	Node *html.Node
}

Element

func CreateElement

func CreateElement(tagname string) *Element

CreateElement create the html.ElementNode with specified tag name and then return as the "*Element"

func CreateTextNode

func CreateTextNode(text string) *Element

CreateTextNode create the html.TextNode with specified text and returns the "*Element"

func (Element) AppendChild

func (e Element) AppendChild(c *Element)

AppendChild append "*Element" as last child

func (Element) Attributes

func (e Element) Attributes() []html.Attribute

Attributes returns all attributes on the element

func (Element) ChildElementCount

func (e Element) ChildElementCount() int

ChildElementCount returns the number of html.ElementNode

func (Element) ChildNodes

func (e Element) ChildNodes() []*html.Node

ChildNodes returns all of child nodes

func (Element) Children

func (e Element) Children() Collection

Children returns all of child html.ElementNode as "Collection"

func (Element) ClassList

func (e Element) ClassList() DOMTokenList

ClassList

func (Element) ClassName

func (e Element) ClassName() string

ClassName returns class value of attribute or if element not has class empty string

func (Element) CloneNode

func (e Element) CloneNode() *Element

CloneNode returns clone "*Element"

func (Element) FirstChild

func (e Element) FirstChild() *html.Node

FirstChild returns first child node

func (Element) FirstElementChild

func (e Element) FirstElementChild() *Element

FirstElementChild returns first html.ElementNode as "*Element"

func (Element) GetAttribute

func (e Element) GetAttribute(key string) string

GetAttribute returns the value of a specified attribute

func (Element) GetAttributeNS

func (e Element) GetAttributeNS(namespace, key string) string

GetAttributeNS returns the value of the attribute with the specified namespace and key

func (Element) GetAttributeNode

func (e Element) GetAttributeNode(key string) (html.Attribute, bool)

GetAttributeNode returns the attribute, and the bool value indicating whether it exists with the specified key

func (Element) GetAttributeNodeNS

func (e Element) GetAttributeNodeNS(namespace, key string) (html.Attribute, bool)

GetAttributeNodeNS returns the attribute, and the bool value indicating whether it exists with the specified namespace and key

func (Element) GetByClass

func (e Element) GetByClass(classname string) Collection

GetByClass alias `GetElementsByClassName'

func (Element) GetById

func (e Element) GetById(id string) *Element

GetById alias `GetElementById'

func (Element) GetByName

func (e Element) GetByName(name string) Collection

GetByName alias `GetElementsByName'

func (Element) GetByTag

func (e Element) GetByTag(tagname string) Collection

GetByTag alias `GetElementsByTagName'

func (Element) GetElementById

func (e Element) GetElementById(id string) *Element

GetElementById returns find an element has given id

func (Element) GetElementsByClassName

func (e Element) GetElementsByClassName(classname string) Collection

GetElementsByClassName returns find all elements has given classname

func (Element) GetElementsByName

func (e Element) GetElementsByName(name string) Collection

GetElementsByName returns find all elements has given name

func (Element) GetElementsByTagName

func (e Element) GetElementsByTagName(tagname string) Collection

GetElementsByTagName returns find all elements have given tagname

func (Element) HasAttribute

func (e Element) HasAttribute(key string) bool

HasAttribute returns the bool value indicating whether element has an attribute with specified key

func (Element) HasAttributeNS

func (e Element) HasAttributeNS(namespace, key string) bool

HasAttributeNS

func (Element) HasAttributes

func (e Element) HasAttributes() bool

HasAttributes returns the bool value indicating whether element has attributes

func (Element) HasChildNodes

func (e Element) HasChildNodes() bool

HasChildNodes returns true if "Document" has node

func (Element) Id

func (e Element) Id() string

Id returns id value of attribute or if element not has id empty string

func (Element) InnerHTML

func (e Element) InnerHTML(text ...string) string

InnerHTML set or get inner html to an element

func (Element) InnerText

func (e Element) InnerText(text ...string) string

InnerText set or get text to an element

func (Element) InsertAdjacentElement

func (e Element) InsertAdjacentElement(p Position, newElement *Element) error

InsertAdjacentElement inserts element to specified position

func (Element) InsertAdjacentHTML

func (e Element) InsertAdjacentHTML(p Position, texthtml string) error

InsertAdjacentHTML inserts text HTML as the html.ElementNode to specified position

func (Element) InsertAdjacentText

func (e Element) InsertAdjacentText(p Position, text string) error

InsertAdjacentText inserts text as the html.TextNode to specified position

func (Element) InsertBefore

func (e Element) InsertBefore(newChild, oldChild *Element)

InsertBefore inserts a newChild before the oldChild as child

func (Element) LastChild

func (e Element) LastChild() *html.Node

LastChild returns last child node

func (Element) LastElementChild

func (e Element) LastElementChild() *Element

LastElementChild returns last html.ElementNode as "*Element"

func (Element) LocalName

func (e Element) LocalName() string

LocalName returns string as lowercase

func (Element) NextElementSibling

func (e Element) NextElementSibling() *Element

NextElementSibling returns next html.ElementNode as "*Element"

func (Element) NextSibling

func (e Element) NextSibling() *html.Node

NextSibling returns next sibling node

func (Element) OuterHTML

func (e Element) OuterHTML() string

OuterHTML include element itself

func (Element) ParentElement

func (e Element) ParentElement() *Element

ParentElement returns parent node as "*Element"

func (Element) ParentNode

func (e Element) ParentNode() *html.Node

ParentNode returns parent node

func (Element) PreviousElementSibling

func (e Element) PreviousElementSibling() *Element

PreviousElementSibling returns previous html.ElementNode as "*Element"

func (Element) PreviousSibling

func (e Element) PreviousSibling() *html.Node

PreviousSibling returns previous sibling node

func (Element) Query

func (e Element) Query(selector string) *Element

Query alias `QuerySelector'

func (Element) QueryAll

func (e Element) QueryAll(selector string) Collection

QueryAll alias `QuerySelectorAll'

func (Element) QuerySelector

func (e Element) QuerySelector(s string) *Element

QuerySelector returns find an element have given css selector

func (Element) QuerySelectorAll

func (e Element) QuerySelectorAll(s string) Collection

QuerySelectorAll returns find all elements has given css selector

func (Element) Remove

func (e Element) Remove()

Remove delete Element itself

func (Element) RemoveAttribute

func (e Element) RemoveAttribute(key string)

RemoveAttribute

func (Element) RemoveAttributeNS

func (e Element) RemoveAttributeNS(namespace, key string)

RemoveAttributeNS

func (Element) RemoveAttributeNode

func (e Element) RemoveAttributeNode(a html.Attribute)

RemoveAttributeNode

func (Element) RemoveChild

func (e Element) RemoveChild(c *Element)

RemoveChild remove a given "*Element" specified *Element is must be child

func (Element) ReplaceChild

func (e Element) ReplaceChild(newElement, oldElement *Element) *Element

ReplaceChild returns old element. panic if an error

func (Element) SetAttribute

func (e Element) SetAttribute(key string, value string)

SetAttribute sets the value of an attribute on the element

func (Element) SetAttributeNS

func (e Element) SetAttributeNS(namespace, key, value string)

SetAttributeNS sets the value of an attribute with the specified namespace and name

func (Element) SetAttributeNode

func (e Element) SetAttributeNode(a html.Attribute)

SetAttributeNode sets the attribute. if already exist the key, it attribute overridden

func (Element) SetAttributeNodeNS

func (e Element) SetAttributeNodeNS(a html.Attribute)

SetAttributeNodeNS sets the namespaced attribute node on the element

func (Element) TagName

func (e Element) TagName() string

TagName returns string as uppercase

func (Element) TextContent

func (e Element) TextContent(text ...string) string

TextContent set or get text to an element

type Position

type Position int

Position

<!-- beforebegin --> <p>

<!-- afterbegin -->
childnodes
<!-- beforeend -->

</p> <!-- afterend -->

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL