Back to

Package text

Latest Go to latest
Published: Aug 27, 2019 | License: MIT | Module:


Package text is a Go library to operate text.

Calculation of string width is conformed to


Package Files


const (
	UTF8BOM    = "\xef\xbb\xbf"
	UTF16BEBOM = "\xfe\xff"
	UTF16LEBOM = "\xff\xfe"
const (
	SurrogatePairHighBegin = "\xd8\x00"
	SurrogatePairHighEnd   = "\xdb\xff"
	SurrogatePairLowBegin  = "\xdc\x00"
	SurrogatePairLowEnd    = "\xdf\xff"


var AmbiguousTable = &unicode.RangeTable{
	R16: []unicode.Range16{ /* 189 elements not displayed */

	R32: []unicode.Range32{
		{Lo: 0x1f100, Hi: 0x1f10a, Stride: 1},
		{Lo: 0x1f110, Hi: 0x1f12d, Stride: 1},
		{Lo: 0x1f130, Hi: 0x1f169, Stride: 1},
		{Lo: 0x1f170, Hi: 0x1f18d, Stride: 1},
		{Lo: 0x1f18f, Hi: 0x1f190, Stride: 1},
		{Lo: 0x1f19b, Hi: 0x1f1ac, Stride: 1},
		{Lo: 0xe0100, Hi: 0xe01ef, Stride: 1},
		{Lo: 0xf0000, Hi: 0xffffd, Stride: 1},
		{Lo: 0x100000, Hi: 0x10fffd, Stride: 1},
var DiacriticalSignTable = &unicode.RangeTable{
	R16: []unicode.Range16{
		{Lo: 0x0300, Hi: 0x036f, Stride: 1},
		{Lo: 0x0591, Hi: 0x05af, Stride: 1},
		{Lo: 0x05b0, Hi: 0x05bd, Stride: 1},
		{Lo: 0x05bf, Hi: 0x05bf, Stride: 1},
		{Lo: 0x05c1, Hi: 0x05c2, Stride: 1},
		{Lo: 0x05c4, Hi: 0x05c5, Stride: 1},
		{Lo: 0x05c7, Hi: 0x05c7, Stride: 1},
		{Lo: 0x064b, Hi: 0x0652, Stride: 1},
		{Lo: 0x0653, Hi: 0x065f, Stride: 1},
		{Lo: 0x0670, Hi: 0x0670, Stride: 1},
		{Lo: 0x08a0, Hi: 0x08ff, Stride: 1},
		{Lo: 0x1ab0, Hi: 0x1aff, Stride: 1},
		{Lo: 0x1dc0, Hi: 0x1dff, Stride: 1},
		{Lo: 0x20d0, Hi: 0x20ff, Stride: 1},
		{Lo: 0xfbb2, Hi: 0xfbc1, Stride: 1},
		{Lo: 0xfe20, Hi: 0xfe2f, Stride: 1},
var EncodingLiteral = map[Encoding]string{
	AUTO:     "AUTO",
	UTF8:     "UTF8",
	UTF8M:    "UTF8M",
	UTF16:    "UTF16",
	UTF16BE:  "UTF16BE",
	UTF16LE:  "UTF16LE",
	SJIS:     "SJIS",
var ErrInvalidEncoding = errors.New("invalid character encoding")
var ErrUnknownEncoding = errors.New("cannot detect character encoding")
var FormatCharTable = &unicode.RangeTable{
	R16: []unicode.Range16{
		{Lo: 0x034f, Hi: 0x034f, Stride: 1},
		{Lo: 0x200c, Hi: 0x200f, Stride: 1},
		{Lo: 0x2028, Hi: 0x202e, Stride: 1},
		{Lo: 0x2061, Hi: 0x2069, Stride: 1},
var FullWidthTable = &unicode.RangeTable{
	R16: []unicode.Range16{ /* 200 elements not displayed */

	R32: []unicode.Range32{
		{Lo: 0x16fe0, Hi: 0x16fe1, Stride: 1},
		{Lo: 0x17000, Hi: 0x187f1, Stride: 1},
		{Lo: 0x18800, Hi: 0x18af2, Stride: 1},
		{Lo: 0x1b000, Hi: 0x1b0ff, Stride: 1},
		{Lo: 0x1b100, Hi: 0x1b11e, Stride: 1},
		{Lo: 0x1b170, Hi: 0x1b2fb, Stride: 1},
		{Lo: 0x1f004, Hi: 0x1f004, Stride: 1},
		{Lo: 0x1f0cf, Hi: 0x1f0cf, Stride: 1},
		{Lo: 0x1f18e, Hi: 0x1f18e, Stride: 1},
		{Lo: 0x1f191, Hi: 0x1f19a, Stride: 1},
		{Lo: 0x1f200, Hi: 0x1f202, Stride: 1},
		{Lo: 0x1f210, Hi: 0x1f23b, Stride: 1},
		{Lo: 0x1f240, Hi: 0x1f248, Stride: 1},
		{Lo: 0x1f250, Hi: 0x1f251, Stride: 1},
		{Lo: 0x1f260, Hi: 0x1f265, Stride: 1},
		{Lo: 0x1f300, Hi: 0x1f320, Stride: 1},
		{Lo: 0x1f32d, Hi: 0x1f335, Stride: 1},
		{Lo: 0x1f337, Hi: 0x1f37c, Stride: 1},
		{Lo: 0x1f37e, Hi: 0x1f393, Stride: 1},
		{Lo: 0x1f3a0, Hi: 0x1f3ca, Stride: 1},
		{Lo: 0x1f3cf, Hi: 0x1f3d3, Stride: 1},
		{Lo: 0x1f3e0, Hi: 0x1f3f0, Stride: 1},
		{Lo: 0x1f3f4, Hi: 0x1f3f4, Stride: 1},
		{Lo: 0x1f3f8, Hi: 0x1f3fa, Stride: 1},
		{Lo: 0x1f3fb, Hi: 0x1f3ff, Stride: 1},
		{Lo: 0x1f400, Hi: 0x1f43e, Stride: 1},
		{Lo: 0x1f440, Hi: 0x1f440, Stride: 1},
		{Lo: 0x1f442, Hi: 0x1f4fc, Stride: 1},
		{Lo: 0x1f4ff, Hi: 0x1f53d, Stride: 1},
		{Lo: 0x1f54b, Hi: 0x1f54e, Stride: 1},
		{Lo: 0x1f550, Hi: 0x1f567, Stride: 1},
		{Lo: 0x1f57a, Hi: 0x1f57a, Stride: 1},
		{Lo: 0x1f595, Hi: 0x1f596, Stride: 1},
		{Lo: 0x1f5a4, Hi: 0x1f5a4, Stride: 1},
		{Lo: 0x1f5fb, Hi: 0x1f5ff, Stride: 1},
		{Lo: 0x1f600, Hi: 0x1f64f, Stride: 1},
		{Lo: 0x1f680, Hi: 0x1f6c5, Stride: 1},
		{Lo: 0x1f6cc, Hi: 0x1f6cc, Stride: 1},
		{Lo: 0x1f6d0, Hi: 0x1f6d2, Stride: 1},
		{Lo: 0x1f6eb, Hi: 0x1f6ec, Stride: 1},
		{Lo: 0x1f6f4, Hi: 0x1f6f9, Stride: 1},
		{Lo: 0x1f910, Hi: 0x1f93e, Stride: 1},
		{Lo: 0x1f940, Hi: 0x1f970, Stride: 1},
		{Lo: 0x1f973, Hi: 0x1f976, Stride: 1},
		{Lo: 0x1f97a, Hi: 0x1f97a, Stride: 1},
		{Lo: 0x1f97c, Hi: 0x1f9a2, Stride: 1},
		{Lo: 0x1f9b0, Hi: 0x1f9b9, Stride: 1},
		{Lo: 0x1f9c0, Hi: 0x1f9c2, Stride: 1},
		{Lo: 0x1f9d0, Hi: 0x1f9ff, Stride: 1},
		{Lo: 0x20000, Hi: 0x2a6d6, Stride: 1},
		{Lo: 0x2a6d7, Hi: 0x2a6ff, Stride: 1},
		{Lo: 0x2a700, Hi: 0x2b734, Stride: 1},
		{Lo: 0x2b735, Hi: 0x2b73f, Stride: 1},
		{Lo: 0x2b740, Hi: 0x2b81d, Stride: 1},
		{Lo: 0x2b81e, Hi: 0x2b81f, Stride: 1},
		{Lo: 0x2b820, Hi: 0x2cea1, Stride: 1},
		{Lo: 0x2cea2, Hi: 0x2ceaf, Stride: 1},
		{Lo: 0x2ceb0, Hi: 0x2ebe0, Stride: 1},
		{Lo: 0x2ebe1, Hi: 0x2f7ff, Stride: 1},
		{Lo: 0x2f800, Hi: 0x2fa1d, Stride: 1},
		{Lo: 0x2fa1e, Hi: 0x2fa1f, Stride: 1},
		{Lo: 0x2fa20, Hi: 0x2fffd, Stride: 1},
		{Lo: 0x30000, Hi: 0x3fffd, Stride: 1},
var LineBreakLiteral = map[LineBreak]string{
	CR:   "CR",
	LF:   "LF",
var RightToLeftTable = &unicode.RangeTable{
	R16: []unicode.Range16{
		{Lo: 0x0590, Hi: 0x05ff, Stride: 1},
		{Lo: 0x0600, Hi: 0x06ff, Stride: 1},
		{Lo: 0x0700, Hi: 0x074f, Stride: 1},
		{Lo: 0x0750, Hi: 0x077f, Stride: 1},
		{Lo: 0x0860, Hi: 0x086f, Stride: 1},
		{Lo: 0x08a0, Hi: 0x08ff, Stride: 1},
		{Lo: 0xfb50, Hi: 0xfdff, Stride: 1},
		{Lo: 0xfe70, Hi: 0xfeff, Stride: 1},
	R32: []unicode.Range32{
		{Lo: 0x10ac0, Hi: 0x10aff, Stride: 1},
		{Lo: 0x10c00, Hi: 0x10c4f, Stride: 1},
		{Lo: 0x10f00, Hi: 0x10f2f, Stride: 1},
		{Lo: 0x10f30, Hi: 0x10f6f, Stride: 1},
		{Lo: 0x1ee00, Hi: 0x1eeff, Stride: 1},
var SJISSingleByteTable = &unicode.RangeTable{
	R16: []unicode.Range16{
		{Lo: 0x0020, Hi: 0x007e, Stride: 1},
		{Lo: 0xff61, Hi: 0xff9f, Stride: 1},
var ZeroWidthSpaceTable = &unicode.RangeTable{
	R16: []unicode.Range16{
		{Lo: 0x200b, Hi: 0x200b, Stride: 1},
		{Lo: 0x2060, Hi: 0x2063, Stride: 1},
		{Lo: 0xfeff, Hi: 0xfeff, Stride: 1},

func ByteSize

func ByteSize(s string, encoding Encoding) int

Calculates byte size of a string.

func Decode

func Decode(src []byte, enc Encoding) ([]byte, error)

Decode a string from any encoding to UTF-8.

func Encode

func Encode(src []byte, enc Encoding) ([]byte, error)

Encode a string from UTF-8 to another encoding.

func GetTransformDecoder

func GetTransformDecoder(r io.Reader, enc Encoding) (io.Reader, error)

Get a reader to transform character encoding from any encoding to UTF-8.

func GetTransformEncoder

func GetTransformEncoder(r io.Reader, enc Encoding) (io.Reader, error)

Get a reader to transform character encoding from UTF-8 to another encoding.

func GetTransformWriter

func GetTransformWriter(w io.Writer, enc Encoding) (io.Writer, error)

Get a writer to transform character encoding from UTF-8 to another encoding.

func IsHighSurrogate

func IsHighSurrogate(r rune) bool

func IsLowSurrogate

func IsLowSurrogate(r rune) bool

func IsRightToLeftLetters

func IsRightToLeftLetters(s string) bool

Returns if a string is Right-to-Left horizontal writing characters.

func NewUTF8MEncoder

func NewUTF8MEncoder() *encoding.Encoder

func RuneByteSize

func RuneByteSize(r rune, encoding Encoding) int

Calculates byte size of a character.

func RuneWidth

func RuneWidth(r rune, eastAsianEncoding bool, countDiacriticalSign bool, countFormatCode bool) int

Calculates character width to be displayed.

func Width

func Width(s string, eastAsianEncoding bool, countDiacriticalSign bool, countFormatCode bool) int

Calculates string width to be displayed.

type Encoding

type Encoding uint8
const (
	AUTO Encoding = iota

func DetectEncoding

func DetectEncoding(r io.ReadSeeker) (detected Encoding, err error)

Detects character encoding

func DetectInSpecifiedEncoding

func DetectInSpecifiedEncoding(r io.ReadSeeker, enc Encoding) (detected Encoding, err error)

func InferEncoding

func InferEncoding(b []byte, eof bool) (Encoding, error)

func ParseEncoding

func ParseEncoding(s string) (Encoding, error)

func (Encoding) String

func (e Encoding) String() string

type FieldAlignment

type FieldAlignment int
const (
	NotAligned FieldAlignment = iota

type LineBreak

type LineBreak string
const (
	CR   LineBreak = "\r"
	LF   LineBreak = "\n"
	CRLF LineBreak = "\r\n"

func ParseLineBreak

func ParseLineBreak(s string) (LineBreak, error)

func (LineBreak) String

func (lb LineBreak) String() string

func (LineBreak) Value

func (lb LineBreak) Value() string

type RawText

type RawText []byte

type UTF8MEncoder

type UTF8MEncoder struct {
	// contains filtered or unexported fields

func (*UTF8MEncoder) Reset

func (u *UTF8MEncoder) Reset()

func (*UTF8MEncoder) Transform

func (u *UTF8MEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
Documentation was rendered with GOOS=linux and GOARCH=amd64.

Jump to identifier

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to identifier