Documentation ¶
Index ¶
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var DefaultAllowList = &AllowList{ Tags: []*Tag{ {"address", []string{}, []string{}}, {"article", []string{}, []string{}}, {"aside", []string{}, []string{}}, {"footer", []string{}, []string{}}, {"header", []string{}, []string{}}, {"h1", []string{}, []string{}}, {"h2", []string{}, []string{}}, {"h3", []string{}, []string{}}, {"h4", []string{}, []string{}}, {"h5", []string{}, []string{}}, {"h6", []string{}, []string{}}, {"hgroup", []string{}, []string{}}, {"main", []string{}, []string{}}, {"nav", []string{}, []string{}}, {"section", []string{}, []string{}}, {"blockquote", []string{}, []string{"cite"}}, {"dd", []string{}, []string{}}, {"div", []string{}, []string{}}, {"dl", []string{}, []string{}}, {"dt", []string{}, []string{}}, {"figcaption", []string{}, []string{}}, {"figure", []string{}, []string{}}, {"hr", []string{}, []string{}}, {"li", []string{}, []string{}}, {"main", []string{}, []string{}}, {"ol", []string{}, []string{}}, {"p", []string{}, []string{}}, {"pre", []string{}, []string{}}, {"ul", []string{}, []string{}}, {"a", []string{"rel", "target", "referrerpolicy"}, []string{"href"}}, {"abbr", []string{"title"}, []string{}}, {"b", []string{}, []string{}}, {"bdi", []string{}, []string{}}, {"bdo", []string{}, []string{}}, {"br", []string{}, []string{}}, {"cite", []string{}, []string{}}, {"code", []string{}, []string{}}, {"data", []string{"value"}, []string{}}, {"em", []string{}, []string{}}, {"i", []string{}, []string{}}, {"kbd", []string{}, []string{}}, {"mark", []string{}, []string{}}, {"q", []string{}, []string{"cite"}}, {"s", []string{}, []string{}}, {"small", []string{}, []string{}}, {"span", []string{}, []string{}}, {"strong", []string{}, []string{}}, {"sub", []string{}, []string{}}, {"sup", []string{}, []string{}}, {"time", []string{"datetime"}, []string{}}, {"u", []string{}, []string{}}, {"area", []string{"alt", "coords", "shape", "target", "rel", "referrerpolicy"}, []string{"href"}}, {"audio", []string{"autoplay", "controls", "crossorigin", "duration", "loop", "muted", "preload"}, []string{"src"}}, {"img", []string{"alt", "crossorigin", "height", "width", "loading", "referrerpolicy"}, []string{"src"}}, {"map", []string{"name"}, []string{}}, {"track", []string{"default", "kind", "label", "srclang"}, []string{"src"}}, {"video", []string{"autoplay", "buffered", "controls", "crossorigin", "duration", "loop", "muted", "preload", "height", "width"}, []string{"src", "poster"}}, {"picture", []string{}, []string{}}, {"source", []string{"type"}, []string{"src"}}, {"del", []string{}, []string{}}, {"ins", []string{}, []string{}}, {"caption", []string{}, []string{}}, {"col", []string{"span"}, []string{}}, {"colgroup", []string{}, []string{}}, {"table", []string{}, []string{}}, {"tbody", []string{}, []string{}}, {"td", []string{"colspan", "rowspan"}, []string{}}, {"tfoot", []string{}, []string{}}, {"th", []string{"colspan", "rowspan", "scope"}, []string{}}, {"thead", []string{}, []string{}}, {"tr", []string{}, []string{}}, {"details", []string{"open"}, []string{}}, {"summary", []string{}, []string{}}, }, GlobalAttr: []string{ "class", "id", }, NonHTMLTags: []*Tag{ {Name: "script"}, {Name: "style"}, {Name: "object"}, }, }
DefaultAllowList for HTML filter.
The allowlist contains most tags listed in https://developer.mozilla.org/en-US/docs/Web/HTML/Element . It is not recommended to modify the default list directly, use .Clone() and then modify the new one instead.
Functions ¶
func DefaultURLSanitizer ¶
DefaultURLSanitizer is a default and strict sanitizer. It only accepts
- URL with scheme http or https
- relative URL, such as abc, abc?xxx=1, abc#123
- absolute URL, such as /abc, /abc?xxx=1, /abc#123
func NewWriter ¶
NewWriter returns a new Writer, with DefaultAllowList, writing sanitized HTML content to w.
Example ¶
package main import ( "bytes" "fmt" "io" "strings" "github.com/sym01/htmlsanitizer" ) func main() { // demo data data := strings.Repeat(`abc--> <a href="javascript:alert(1)">link1</a> <a href=http://example.com>link2<script>xxx</script></a> <!--`, 1024) expected := "abc-->" + strings.Repeat(` <a>link1</a> <a href="http://example.com">link2</a> `, 1024) // underlying writer for demo o := new(bytes.Buffer) // source reader for demo r := bytes.NewBufferString(data) sanitizedWriter := htmlsanitizer.NewWriter(o) _, _ = io.Copy(sanitizedWriter, r) // check the result, for demo only fmt.Print(o.String() == expected) }
Output: true
func SanitizeString ¶
SanitizeString uses the DefaultAllowList to sanitize the HTML string.
Types ¶
type AllowList ¶ added in v1.0.1
type AllowList struct { // Tags specifies all the allow tags. Tags []*Tag // GlobalAttr specifies the allowed attributes for all the tag. // It's very useful for some common attributes, such as `class`, `id`. // For security reasons, it's not recommended to set a glboal attr for // any URL-related attribute. GlobalAttr []string // NonHTMLTags defines a set of special tags, such as <script> and <style>. // The content of these kind of tags is actually not a real HTML content. // So we should treat it as a single element, without any child elements. // TODO: rename this one NonHTMLTags []*Tag }
AllowList speficies all the allowed HTML tags and its attributes for the filter.
func (*AllowList) FindTag ¶ added in v1.0.1
FindTag finds and returns tag by its name, case insensitive.
func (*AllowList) RemoveTag ¶ added in v1.0.1
RemoveTag removes all tags name `name`, must be lowercase It is not recommended to modify the default list directly, use .Clone() and then modify the new one instead.
Example ¶
package main import ( "fmt" "github.com/sym01/htmlsanitizer" ) func main() { // sometimes we don't want user to pass HTML with <a> tag sanitizer := htmlsanitizer.NewHTMLSanitizer() sanitizer.RemoveTag("a") data := ` <h1 ClaSs="h1">hello</h1> <p> Hello, world<br> Welcome to use <a href="https://github.com/sym01/htmlsanitizer">htmlsanitizer</a> </p>` output, _ := sanitizer.SanitizeString(data) fmt.Print(output) }
Output: <h1 class="h1">hello</h1> <p> Hello, world<br> Welcome to use htmlsanitizer </p>
type HTMLSanitizer ¶
type HTMLSanitizer struct { *AllowList // URLSanitizer is a func used to sanitize all the URLAttr. // URLSanitizer returns a sanitzed URL and a bool var indicating // whether the current attribute is acceptable. If not acceptable, // the current attribute will be ignored. // If the func is nil, then DefaultURLSanitizer will be used. URLSanitizer func(rawURL string) (sanitzed string, ok bool) }
HTMLSanitizer is a super fast HTML sanitizer for arbitrary HTML content. This is a allowlist-based santizer, of which the time complexity is O(n).
Example (CustomURLSanitizer) ¶
package main import ( "fmt" "net/url" "github.com/sym01/htmlsanitizer" ) func main() { // only links with domain name example.com are allowed. sanitizer := htmlsanitizer.NewHTMLSanitizer() sanitizer.URLSanitizer = func(rawURL string) (newURL string, ok bool) { newURL, ok = htmlsanitizer.DefaultURLSanitizer(rawURL) if !ok { return } u, err := url.Parse(newURL) if err != nil { ok = false return } if u.Host == "example.com" { ok = true return } ok = false return } data := ` <a href="http://others.com">Link</a> <a href="https://example.com/xxx">Link with example.com</a> ` output, _ := sanitizer.SanitizeString(data) fmt.Print(output) }
Output: <a>Link</a> <a href="https://example.com/xxx">Link with example.com</a>
Example (KeepStyleSheet) ¶
package main import ( "fmt" "github.com/sym01/htmlsanitizer" ) func main() { sanitizer := htmlsanitizer.NewHTMLSanitizer() sanitizer.AllowList.Tags = append(sanitizer.AllowList.Tags, &htmlsanitizer.Tag{Name: "style"}, &htmlsanitizer.Tag{Name: "head"}, &htmlsanitizer.Tag{Name: "body"}, &htmlsanitizer.Tag{Name: "html"}, ) data := `<!doctype html> <html> <head> <style type="text/css"> body { background-color: #f0f0f2; margin: 0; padding: 0; bad-attr: <body></body>; bad-attr: <body></body >; bad-attr: <body></ body>; font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; } </style> </head> <body> <div> <h1>Example Domain</h1> <p><a href="https://www.iana.org/domains/example">More information...</a></p> </div> </body> </html>` output, _ := sanitizer.SanitizeString(data) fmt.Print(output) }
Output: <html> <head> <style> body { background-color: #f0f0f2; margin: 0; padding: 0; bad-attr: <body></body>; bad-attr: <body></body >; bad-attr: <body></ body>; font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; } </style> </head> <body> <div> <h1>Example Domain</h1> <p><a href="https://www.iana.org/domains/example">More information...</a></p> </div> </body> </html>
Example (NoTagsAllowed) ¶
package main import ( "fmt" "github.com/sym01/htmlsanitizer" ) func main() { sanitizer := htmlsanitizer.NewHTMLSanitizer() // just set AllowList to nil to disable all tags sanitizer.AllowList = nil // of course nothing will happen here sanitizer.RemoveTag("a") data := ` <a href="http://others.com">Link</a> <a href="https://example.com/xxx">Link with example.com</a> ` output, _ := sanitizer.SanitizeString(data) fmt.Print(output) }
Output: Link Link with example.com
Example (OnlyAllowHrefTag) ¶
package main import ( "fmt" "github.com/sym01/htmlsanitizer" ) func main() { sanitizer := htmlsanitizer.NewHTMLSanitizer() sanitizer.AllowList.Tags = []*htmlsanitizer.Tag{ {"a", nil, []string{"href"}}, } data := ` <details/open/ontoggle=alert(1)></details> <a href="http://others.com" target="_blank">Link</a> <a href="https://example.com/xxx">Link with example.com</a> ` output, _ := sanitizer.SanitizeString(data) fmt.Print(output) }
Output: <a href="http://others.com">Link</a> <a href="https://example.com/xxx">Link with example.com</a>
func NewHTMLSanitizer ¶
func NewHTMLSanitizer() *HTMLSanitizer
NewHTMLSanitizer creates a new HTMLSanitizer with the clone of the DefaultAllowList.
func (*HTMLSanitizer) NewWriter ¶
func (f *HTMLSanitizer) NewWriter(w io.Writer) io.Writer
NewWriter returns a new Writer writing sanitized HTML content to w.
func (*HTMLSanitizer) Sanitize ¶
func (f *HTMLSanitizer) Sanitize(data []byte) ([]byte, error)
Sanitize the HTML data and return the sanitzed HTML.
func (*HTMLSanitizer) SanitizeString ¶
func (f *HTMLSanitizer) SanitizeString(data string) (string, error)
SanitizeString sanitizes the HTML string and return the sanitzed HTML.
type Tag ¶
type Tag struct { // Name for current tag, must be lowercase. Name string // Attr specifies the allowed attributes for current tag, // must be lowercase. // // e.g. colspan, rowspan Attr []string // URLAttr specifies the allowed, URL-relatedd attributes for current tag, // must be lowercase. // // e.g. src, href URLAttr []string }
Tag with its attributes.