document

package
v0.0.0-...-9d154b7 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 12, 2021 License: GPL-3.0 Imports: 7 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// NullMode ...
	NullMode mode = iota
	// HTMLMode ...
	HTMLMode
	// TextMode ...
	TextMode
	// SkipMode ...
	SkipMode
	// PuncMode ...
	PuncMode
)

Variables

View Source
var HTMLTagSet = map[string]bool{}/* 145 elements not displayed */

HTMLTagSet ...

Functions

This section is empty.

Types

type CompressedToken

type CompressedToken struct {
	Token
	Content string `json:"-"`
}

CompressedToken ...

type Document

type Document struct {
	Tokens     []*Token     `json:"tokens"`
	Paragraphs []*Paragraph `json:"paragraphs"`
	Titles     []*Title     `json:"title"`
	Raw        string       `json:"raw"`
	Checksum   string       `json:"checksum"`
}

Document ...

func New

func New(b []byte) (*Document, error)

New ...

type Paragraph

type Paragraph struct {
	Part
}

Paragraph ...

type Part

type Part struct {
	ByteStart  int `json:"byte_start"`
	ByteEnd    int `json:"byte_end"`
	TokenStart int `json:"token_start"`
	TokenEnd   int `json:"token_end"`
}

Part ...

type Title

type Title struct {
	Part
}

Title ...

type Token

type Token struct {
	Start   int       `json:"start"`
	End     int       `json:"end"`
	Content string    `json:"content"`
	Idx     int       `json:"index"`
	Type    TokenType `json:"type"`
}

Token ...

type TokenType

type TokenType string

TokenType ...

const (
	// TextToken ...
	TextToken TokenType = "text"
	// HTMLToken ...
	HTMLToken TokenType = "html"
	// SpecialToken ...
	SpecialToken TokenType = "special"
	// PuncToken ...
	PuncToken TokenType = "punc"
)

type Tokenizer

type Tokenizer struct {
	// contains filtered or unexported fields
}

Tokenizer ...

func NewTokenizer

func NewTokenizer() *Tokenizer

NewTokenizer ...

func (*Tokenizer) TokenReader

func (t *Tokenizer) TokenReader(data *[]byte) <-chan *Token

TokenReader ...

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL