browser

package
v1.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 8, 2021 License: MIT Imports: 17 Imported by: 115

Documentation

Overview

Package browser contains the primary browser implementation.

Index

Constants

This section is empty.

Variables

View Source
var InitialAssetsSliceSize = 20

InitialAssetsSliceSize is the initial size when allocating a slice of page assets. Increasing this size may lead to a very small performance increase when downloading assets from a page with a lot of assets.

Functions

func DownloadAsset

func DownloadAsset(asset Downloadable, out io.Writer) (int64, error)

DownloadAsset copies a remote file to the given writer.

func DownloadAssetAsync

func DownloadAssetAsync(asset Downloadable, out io.Writer, c AsyncDownloadChannel)

DownloadAssetAsync downloads an asset asynchronously and notifies the given channel when the download is complete.

Types

type Asset

type Asset struct {
	// ID is the value of the id attribute if available.
	ID string

	// URL is the asset URL.
	URL *url.URL

	// Type describes the type of asset.
	Type AssetType
}

Asset implements Assetable.

func (*Asset) AssetType

func (at *Asset) AssetType() AssetType

Type returns the asset type.

func (*Asset) Id

func (at *Asset) Id() string

Id returns the asset ID or an empty string when not available.

func (*Asset) Url

func (at *Asset) Url() *url.URL

Url returns the asset URL.

type AssetType

type AssetType uint16

AssetType describes a type of page asset, such as an image or stylesheet.

const (
	// LinkAsset describes a *Link asset.
	LinkAsset AssetType = iota

	// ImageAsset describes an *Image asset.
	ImageAsset

	// StylesheetAsset describes a *Stylesheet asset.
	StylesheetAsset

	// ScriptAsset describes a *Script asset.
	ScriptAsset
)

type Assetable

type Assetable interface {
	// Url returns the asset URL.
	Url() *url.URL

	// Id returns the asset ID or an empty string when not available.
	Id() string

	// Type describes the type of asset.
	AssetType() AssetType
}

Assetable represents a page asset, such as an image or stylesheet.

type AsyncDownloadChannel

type AsyncDownloadChannel chan *AsyncDownloadResult

AsyncDownloadChannel is a channel upon which the results of an async download are passed.

type AsyncDownloadResult

type AsyncDownloadResult struct {
	// Asset is a pointer to the Downloadable asset that was downloaded.
	Asset Downloadable

	// Writer where the asset data was written.
	Writer io.Writer

	// Size is the number of bytes written to the io.Writer.
	Size int64

	// Error contains any error that occurred during the download or nil.
	Error error
}

AsyncDownloadResult has the results of an asynchronous download.

type Attribute

type Attribute int

Attribute represents a Browser capability.

const (
	// SendReferer instructs a Browser to send the Referer header.
	SendReferer Attribute = iota

	// MetaRefreshHandling instructs a Browser to handle the refresh meta tag.
	MetaRefreshHandling

	// FollowRedirects instructs a Browser to follow Location headers.
	FollowRedirects
)

type AttributeMap

type AttributeMap map[Attribute]bool

AttributeMap represents a map of Attribute values.

type Browsable

type Browsable interface {
	// SetUserAgent sets the user agent.
	SetUserAgent(ua string)

	// SetAttribute sets a browser instruction attribute.
	SetAttribute(a Attribute, v bool)

	// SetAttributes is used to set all the browser attributes.
	SetAttributes(a AttributeMap)

	// SetState sets the init browser state.
	SetState(sj *jar.State)

	// State returns the browser state.
	State() *jar.State

	// SetBookmarksJar sets the bookmarks jar the browser uses.
	SetBookmarksJar(bj jar.BookmarksJar)

	// BookmarksJar returns the bookmarks jar the browser uses.
	BookmarksJar() jar.BookmarksJar

	// SetCookieJar is used to set the cookie jar the browser uses.
	SetCookieJar(cj http.CookieJar)

	// CookieJar returns the cookie jar the browser uses.
	CookieJar() http.CookieJar

	// SetHistoryJar is used to set the history jar the browser uses.
	SetHistoryJar(hj jar.History)

	// HistoryJar returns the history jar the browser uses.
	HistoryJar() jar.History

	// SetHeadersJar sets the headers the browser sends with each request.
	SetHeadersJar(h http.Header)

	// SetTimeout sets the timeout for requests.
	SetTimeout(t time.Duration)

	// SetTransport sets the http library transport mechanism for each request.
	SetTransport(rt http.RoundTripper)

	// AddRequestHeader adds a header the browser sends with each request.
	AddRequestHeader(name, value string)

	// Open requests the given URL using the GET method.
	Open(url string) error

	// Open requests the given URL using the HEAD method.
	Head(url string) error

	// OpenForm appends the data values to the given URL and sends a GET request.
	OpenForm(url string, data url.Values) error

	// OpenBookmark calls Get() with the URL for the bookmark with the given name.
	OpenBookmark(name string) error

	// Post requests the given URL using the POST method.
	Post(url string, contentType string, body io.Reader) error

	// PostForm requests the given URL using the POST method with the given data.
	PostForm(url string, data url.Values) error

	// PostMultipart requests the given URL using the POST method with the given data using multipart/form-data format.
	PostMultipart(u string, fields url.Values, files FileSet) error

	// Back loads the previously requested page.
	Back() bool

	// Reload duplicates the last successful request.
	Reload() error

	// Bookmark saves the page URL in the bookmarks with the given name.
	Bookmark(name string) error

	// Click clicks on the page element matched by the given expression.
	Click(expr string) error

	// Form returns the form in the current page that matches the given expr.
	Form(expr string) (Submittable, error)

	// Forms returns an array of every form in the page.
	Forms() []Submittable

	// Links returns an array of every link found in the page.
	Links() []*Link

	// Images returns an array of every image found in the page.
	Images() []*Image

	// Stylesheets returns an array of every stylesheet linked to the document.
	Stylesheets() []*Stylesheet

	// Scripts returns an array of every script linked to the document.
	Scripts() []*Script

	// SiteCookies returns the cookies for the current site.
	SiteCookies() []*http.Cookie

	// ResolveUrl returns an absolute URL for a possibly relative URL.
	ResolveUrl(u *url.URL) *url.URL

	// ResolveStringUrl works just like ResolveUrl, but the argument and return value are strings.
	ResolveStringUrl(u string) (string, error)

	// Download writes the contents of the document to the given writer.
	Download(o io.Writer) (int64, error)

	// Url returns the page URL as a string.
	Url() *url.URL

	// StatusCode returns the response status code.
	StatusCode() int

	// Title returns the page title.
	Title() string

	// ResponseHeaders returns the page headers.
	ResponseHeaders() http.Header

	// Body returns the page body as a string of html.
	Body() string

	// Dom returns the inner *goquery.Selection.
	Dom() *goquery.Selection

	// Find returns the dom selections matching the given expression.
	Find(expr string) *goquery.Selection

	// Create a new Browser instance and inherit the configuration
	// Read more: https://github.com/headzoo/surf/issues/23
	NewTab() (b *Browser)
}

Browsable represents an HTTP web browser.

type Browser

type Browser struct {
	// contains filtered or unexported fields
}

Browser is the default Browser implementation.

func (*Browser) AddRequestHeader

func (bow *Browser) AddRequestHeader(name, value string)

AddRequestHeader sets a header the browser sends with each request.

func (*Browser) Back

func (bow *Browser) Back() bool

Back loads the previously requested page.

Returns a boolean value indicating whether a previous page existed, and was successfully loaded.

func (*Browser) Body

func (bow *Browser) Body() string

Body returns the page body as a string of html.

func (*Browser) Bookmark

func (bow *Browser) Bookmark(name string) error

Bookmark saves the page URL in the bookmarks with the given name.

func (*Browser) BookmarksJar added in v1.0.1

func (bow *Browser) BookmarksJar() jar.BookmarksJar

BookmarksJar returns the bookmarks jar the browser uses.

func (*Browser) Click

func (bow *Browser) Click(expr string) error

Click clicks on the page element matched by the given expression.

Currently this is only useful for click on links, which will cause the browser to load the page pointed at by the link. Future versions of Surf may support JavaScript and clicking on elements will fire the click event.

func (*Browser) CookieJar added in v1.0.1

func (bow *Browser) CookieJar() http.CookieJar

CookieJar returns the cookie jar the browser uses.

func (*Browser) DelRequestHeader

func (bow *Browser) DelRequestHeader(name string)

DelRequestHeader deletes a header so the browser will not send it with future requests.

func (*Browser) Dom

func (bow *Browser) Dom() *goquery.Selection

Dom returns the inner *goquery.Selection.

func (*Browser) Download

func (bow *Browser) Download(o io.Writer) (int64, error)

Download writes the contents of the document to the given writer.

func (*Browser) Find

func (bow *Browser) Find(expr string) *goquery.Selection

Find returns the dom selections matching the given expression.

func (*Browser) Form

func (bow *Browser) Form(expr string) (Submittable, error)

Form returns the form in the current page that matches the given expr.

func (*Browser) Forms

func (bow *Browser) Forms() []Submittable

Forms returns an array of every form in the page.

func (*Browser) Head

func (bow *Browser) Head(u string) error

Head requests the given URL using the HEAD method.

func (*Browser) HistoryJar added in v1.0.1

func (bow *Browser) HistoryJar() jar.History

HistoryJar returns the history jar the browser uses.

func (*Browser) Images

func (bow *Browser) Images() []*Image

Images returns an array of every image found in the page.

func (bow *Browser) Links() []*Link

Links returns an array of every link found in the page.

func (*Browser) NewTab added in v1.0.1

func (bow *Browser) NewTab() (b *Browser)

func (*Browser) Open

func (bow *Browser) Open(u string) error

Open requests the given URL using the GET method.

func (*Browser) OpenBookmark

func (bow *Browser) OpenBookmark(name string) error

OpenBookmark calls Open() with the URL for the bookmark with the given name.

func (*Browser) OpenForm

func (bow *Browser) OpenForm(u string, data url.Values) error

OpenForm appends the data values to the given URL and sends a GET request.

func (*Browser) Post

func (bow *Browser) Post(u string, contentType string, body io.Reader) error

Post requests the given URL using the POST method.

func (*Browser) PostForm

func (bow *Browser) PostForm(u string, data url.Values) error

PostForm requests the given URL using the POST method with the given data.

func (*Browser) PostMultipart

func (bow *Browser) PostMultipart(u string, fields url.Values, files FileSet) error

PostMultipart requests the given URL using the POST method with the given data using multipart/form-data format.

func (*Browser) RelativeUrl added in v1.0.1

func (bow *Browser) RelativeUrl() *url.URL

RelativeUrl returns URL relative to which all the others work

func (*Browser) Reload

func (bow *Browser) Reload() error

Reload duplicates the last successful request.

func (*Browser) ResolveStringUrl

func (bow *Browser) ResolveStringUrl(u string) (string, error)

ResolveStringUrl works just like ResolveUrl, but the argument and return value are strings.

func (*Browser) ResolveUrl

func (bow *Browser) ResolveUrl(u *url.URL) *url.URL

ResolveUrl returns an absolute URL for a possibly relative URL.

func (*Browser) ResponseHeaders

func (bow *Browser) ResponseHeaders() http.Header

ResponseHeaders returns the page headers.

func (*Browser) Scripts

func (bow *Browser) Scripts() []*Script

Scripts returns an array of every script linked to the document.

func (*Browser) SetAttribute

func (bow *Browser) SetAttribute(a Attribute, v bool)

SetAttribute sets a browser instruction attribute.

func (*Browser) SetAttributes

func (bow *Browser) SetAttributes(a AttributeMap)

SetAttributes is used to set all the browser attributes.

func (*Browser) SetBookmarksJar

func (bow *Browser) SetBookmarksJar(bj jar.BookmarksJar)

SetBookmarksJar sets the bookmarks jar the browser uses.

func (*Browser) SetCookieJar

func (bow *Browser) SetCookieJar(cj http.CookieJar)

SetCookieJar is used to set the cookie jar the browser uses.

func (*Browser) SetHeadersJar

func (bow *Browser) SetHeadersJar(h http.Header)

SetHeadersJar sets the headers the browser sends with each request.

func (*Browser) SetHistoryJar

func (bow *Browser) SetHistoryJar(hj jar.History)

SetHistoryJar is used to set the history jar the browser uses.

func (*Browser) SetState

func (bow *Browser) SetState(sj *jar.State)

SetState sets the browser state.

func (*Browser) SetTimeout added in v1.0.1

func (bow *Browser) SetTimeout(t time.Duration)

SetTransport sets the http library transport mechanism for each request. SetTimeout sets the timeout for requests.

func (*Browser) SetTransport

func (bow *Browser) SetTransport(rt http.RoundTripper)

SetTransport sets the http library transport mechanism for each request.

func (*Browser) SetUserAgent

func (bow *Browser) SetUserAgent(userAgent string)

SetUserAgent sets the user agent.

func (*Browser) SiteCookies

func (bow *Browser) SiteCookies() []*http.Cookie

SiteCookies returns the cookies for the current site.

func (*Browser) State added in v1.0.1

func (bow *Browser) State() *jar.State

State returns the browser state.

func (*Browser) StatusCode

func (bow *Browser) StatusCode() int

StatusCode returns the response status code.

func (*Browser) Stylesheets

func (bow *Browser) Stylesheets() []*Stylesheet

Stylesheets returns an array of every stylesheet linked to the document.

func (*Browser) Title

func (bow *Browser) Title() string

Title returns the page title.

func (*Browser) Url

func (bow *Browser) Url() *url.URL

Url returns the page URL as a string.

type Downloadable

type Downloadable interface {
	Assetable

	// Download writes the contents of the element to the given writer.
	//
	// Returns the number of bytes written.
	Download(out io.Writer) (int64, error)

	// DownloadAsync downloads the contents of the element asynchronously.
	//
	// An instance of AsyncDownloadResult will be sent down the given channel
	// when the download is complete.
	DownloadAsync(out io.Writer, ch AsyncDownloadChannel)
}

Downloadable represents an asset that may be downloaded.

type DownloadableAsset

type DownloadableAsset struct {
	Asset
}

DownloadableAsset is an asset that may be downloaded.

func (*DownloadableAsset) Download

func (at *DownloadableAsset) Download(out io.Writer) (int64, error)

Download writes the asset to the given io.Writer type.

func (*DownloadableAsset) DownloadAsync

func (at *DownloadableAsset) DownloadAsync(out io.Writer, ch AsyncDownloadChannel)

DownloadAsync downloads the asset asynchronously.

type File

type File struct {
	// contains filtered or unexported fields
}

File represents a input type file, that includes the fileName and a io.reader

type FileSet

type FileSet map[string]*File

FileSet represents a map of files used to port multipart

type Form

type Form struct {
	// contains filtered or unexported fields
}

Form is the default form element.

func NewForm

func NewForm(bow Browsable, s *goquery.Selection) *Form

NewForm creates and returns a *Form type.

func (*Form) Action

func (f *Form) Action() string

Action returns the form action URL. The URL will always be absolute.

func (*Form) Check added in v1.0.1

func (f *Form) Check(name string) error

Check sets the checkbox value to its active state.

func (*Form) Click

func (f *Form) Click(button string) error

Click submits the form by clicking the button with the given name.

func (*Form) ClickByValue

func (f *Form) ClickByValue(name, value string) error

Click submits the form by clicking the button with the given name and value.

func (*Form) Dom

func (f *Form) Dom() *goquery.Selection

Dom returns the inner *goquery.Selection.

func (*Form) File

func (f *Form) File(name string, fileName string, data io.Reader) error

File sets the value for an form input type file, it returns an ElementNotFound error if the field does not exists

func (*Form) Get added in v1.0.1

func (f *Form) Get(name string) []string

Get will return the value of a form field and `ok` - whether the field exists or not

func (*Form) Input

func (f *Form) Input(name, value string) error

Input sets the value of a form field. it returns an ElementNotFound error if the field does not exist

func (*Form) IsChecked added in v1.0.1

func (f *Form) IsChecked(name string) (bool, error)

IsChecked returns the current state of the checkbox

func (*Form) Method

func (f *Form) Method() string

Method returns the form method, eg "GET" or "POST".

func (*Form) Remove added in v1.0.1

func (f *Form) Remove(name string)

Remove will remove the form field if it exists.

func (*Form) RemoveValue added in v1.0.1

func (f *Form) RemoveValue(name, val string) error

RemoveValue will remove a single instance of a form value whose name and value match. This is valuable for removing a single value from a select multiple.

func (*Form) SelectByOptionLabel added in v1.0.1

func (f *Form) SelectByOptionLabel(name string, optionLabel ...string) error

SelectByOptionLabel sets the current value of a select form element acording to the options label. If the element is a select multiple, multiple options may be selected.

func (*Form) SelectByOptionValue added in v1.0.1

func (f *Form) SelectByOptionValue(name string, optionValue ...string) error

SelectByOptionValue sets the current value of a select form element acording to the options value. If the element is a select multiple, multiple options may be selected.

func (*Form) SelectLabels added in v1.0.1

func (f *Form) SelectLabels(name string) ([]string, error)

SelectLabels returns the labels for the selected options for a select form element whose name matches. If name is not found, error is returned.

func (*Form) SelectValues added in v1.0.1

func (f *Form) SelectValues(name string) ([]string, error)

SelectValues returns the current values of a form element whose name matches. If name is not found, error is returned. For select multiple elements, all values are returned.

func (*Form) Set

func (f *Form) Set(name, value string) error

Set will set the value of a form field if it exists, or create and set it if it does not.

func (*Form) SetFile

func (f *Form) SetFile(name string, fileName string, data io.Reader)

SetFile sets the value for a form input type file. It will add the field to the form if necessary

func (*Form) Submit

func (f *Form) Submit() error

Submit submits the form. Clicks the first button in the form, or submits the form without using any button when the form does not contain any buttons.

func (*Form) UnCheck added in v1.0.1

func (f *Form) UnCheck(name string) error

UnCheck sets the checkbox value to inactive state.

func (*Form) Value added in v1.0.1

func (f *Form) Value(name string) (string, error)

Value returns the current value of a form element whose name matches. If name is not found, error is returned. For multiple value form element such as select multiple, the first value is returned.

type Image

type Image struct {
	DownloadableAsset

	// Alt is the value of the image alt attribute if available.
	Alt string

	// Title is the value of the image title attribute if available.
	Title string
}

Image stores the properties of an image.

func NewImageAsset

func NewImageAsset(url *url.URL, id, alt, title string) *Image

NewImageAsset creates and returns a new *Image type.

type Link struct {
	Asset

	// Text is the text appearing between the opening and closing anchor tag.
	Text string
}

Link stores the properties of a page link.

func NewLinkAsset

func NewLinkAsset(u *url.URL, id, text string) *Link

NewLinkAsset creates and returns a new *Link type.

type Script

type Script struct {
	DownloadableAsset

	// Type is the value of the type attribute. Defaults to "text/javascript" when not specified.
	Type string
}

Script stores the properties of a linked script.

func NewScriptAsset

func NewScriptAsset(url *url.URL, id, typ string) *Script

NewScriptAsset creates and returns a new *Script type.

type Stylesheet

type Stylesheet struct {
	DownloadableAsset

	// Media is the value of the media attribute. Defaults to "all" when not specified.
	Media string

	// Type is the value of the type attribute. Defaults to "text/css" when not specified.
	Type string
}

Stylesheet stores the properties of a linked stylesheet.

func NewStylesheetAsset

func NewStylesheetAsset(url *url.URL, id, media, typ string) *Stylesheet

NewStylesheetAsset creates and returns a new *Stylesheet type.

type Submittable

type Submittable interface {
	Method() string
	Action() string
	Input(name, value string) error
	Set(name, value string) error

	// Remove will remove the input completely from the form.
	Remove(name string)

	// RemoveValue will remove a single instance of a form value whose name and value match.
	// This is valuable for removing a single value from a select multiple.
	RemoveValue(name, value string) error

	// Value returns the current value of a form element whose name matches.  If name is not
	// found, error is returned.  For multiple value form element such as select multiple,
	// the first value is returned.
	Value(name string) (string, error)

	// Check will set a checkbox to its active state.  This is done by adding it to
	// the form and setting its value to the value attribute defined in the form.
	Check(name string) error

	// UnCheck will set a checkbox to its inactive state.  This is done by removing
	// it from the form.
	UnCheck(name string) error

	// IsChecked returns a boolean indicating if the checkbox is active or inactive.
	IsChecked(name string) (bool, error)

	// SelectByOptionLabel sets the current value of a select form element acording to the
	// options label.  If the element is a select multiple, multiple options may be selected.
	SelectByOptionLabel(name string, optionLabel ...string) error

	// SelectByOptionValue sets the current value of a select form element acording to the
	// options value.  If the element is a select multiple, multiple options may be selected.
	SelectByOptionValue(name string, optionValue ...string) error

	// SelectValues returns the current values of a form element whose name matches.  If name is not
	// found, error is returned.  For select multiple elements, all values are returned.
	SelectValues(name string) ([]string, error)

	// SelectLabels returns the labels for the selected options for a select form element whose name
	// matches.  If name is not found, error is returned.
	SelectLabels(name string) ([]string, error)

	// File sets the value for an form input type file,
	// it returns an ElementNotFound error if the field does not exists
	File(name string, fileName string, data io.Reader) error

	// SetFile sets the value for a form input type file.
	// It will add the field to the form if necessary
	SetFile(name string, fileName string, data io.Reader)

	Click(button string) error
	ClickByValue(name, value string) error
	Submit() error
	Dom() *goquery.Selection
}

Submittable represents an element that may be submitted, such as a form.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL