sanitizer

package
v0.7.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 24, 2020 License: BSD-2-Clause Imports: 6 Imported by: 0

Documentation

Overview

Package sanitizer provides functions for sanitizing HTML text.

Index

Constants

This section is empty.

Variables

View Source
var (
	// EPUBElmt is the default list of accepted EPUB tags/attributes
	EPUBElmt = map[atom.Atom][]string{
		atom.A:          append([]string{"href"}, globalHTMLAttr...),
		atom.B:          globalHTMLAttr,
		atom.Blockquote: globalHTMLAttr,
		atom.Body:       globalHTMLAttr,
		atom.Br:         globalHTMLAttr,
		atom.Caption:    globalHTMLAttr,
		atom.Cite:       globalHTMLAttr,
		atom.Col:        append([]string{"span"}, globalHTMLAttr...),
		atom.Colgroup:   append([]string{"span"}, globalHTMLAttr...),
		atom.Dd:         globalHTMLAttr,
		atom.Del:        globalHTMLAttr,
		atom.Dfn:        globalHTMLAttr,
		atom.Div:        globalHTMLAttr,
		atom.Em:         globalHTMLAttr,
		atom.H1:         globalHTMLAttr,
		atom.H2:         globalHTMLAttr,
		atom.H3:         globalHTMLAttr,
		atom.H4:         globalHTMLAttr,
		atom.H5:         globalHTMLAttr,
		atom.H6:         globalHTMLAttr,
		atom.Head:       {},
		atom.Hr:         globalHTMLAttr,
		atom.Html:       {"lang", "xmlns", "xml:lang"},
		atom.I:          globalHTMLAttr,
		atom.Img:        append([]string{"height", "src", "width"}, globalHTMLAttr...),
		atom.Li:         globalHTMLAttr,
		atom.Link:       {"href", "rel=stylesheet", "type=text/css"},
		atom.Meta:       append([]string{"charset", "content", "name"}, globalHTMLAttr...),
		atom.Ol:         globalHTMLAttr,
		atom.P:          globalHTMLAttr,
		atom.S:          globalHTMLAttr,
		atom.Small:      globalHTMLAttr,
		atom.Span:       globalHTMLAttr,
		atom.Strong:     globalHTMLAttr,
		atom.Sub:        globalHTMLAttr,
		atom.Sup:        globalHTMLAttr,
		atom.Table:      globalHTMLAttr,
		atom.Tbody:      globalHTMLAttr,
		atom.Td:         append([]string{"colspan", "rowspan"}, globalHTMLAttr...),
		atom.Tfoot:      globalHTMLAttr,
		atom.Th:         append([]string{"abbr", "colspan", "rowspan"}, globalHTMLAttr...),
		atom.Thead:      globalHTMLAttr,
		atom.Title:      globalHTMLAttr,
		atom.Tr:         globalHTMLAttr,
		atom.U:          globalHTMLAttr,
		atom.Ul:         globalHTMLAttr,
	}

	// EPUBSchemes lists the accepted schemes to be found in an epub's link.
	EPUBSchemes = []string{"http", "https", "mailto"}

	// EPUB is the default EPUB sanitizer.
	EPUB = NewHTML(EPUBElmt, EPUBSchemes)
)

Functions

This section is empty.

Types

type HTML

type HTML struct {
	// SafeElements is the white-list of allowed Tags/Attributes. They are
	// organized by Tags to narrow white-listing of attribute.
	// Attr can be an expression:
	// - *    : all attributes are allowed
	// - a=key: only the given key is accepted
	SafeElements map[atom.Atom][]string

	// SafeSchemes is the white-list of allowed schemes in URL.
	SafeSchemes []string
}

HTML represents an HTML sanitizer with its set of rules based on white-list approach.

func NewHTML

func NewHTML(safeElmt map[atom.Atom][]string, safeSchemes []string) *HTML

NewHTML creates a new HTML with given set of allowed tags, attributes and schemes.

func (*HTML) Sanitize

func (s *HTML) Sanitize(w io.Writer, r io.Reader) error

Sanitize sanitizes an io.Reader into an io.Writer

func (*HTML) Scan

func (s *HTML) Scan(r io.Reader) error

Scan reports as error any HTML tags or attributes of given io.Reader that is seen as unsafe by the sanitizer.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL