Documentation
¶
Overview ¶
Package convert is extends Go's x/text/encoding capability to convert legacy encoded text to a modern UTF-8 encoding.
Example (Swap) ¶
fmt.Print(string(swap(DEL))) fmt.Print(string(swap(SquareRoot)))
Output: Δ✓
Index ¶
- Constants
- Variables
- func AliasFmt(alias, value string, e encoding.Encoding) (string, error)
- func AsaX34(e encoding.Encoding) string
- func BOM() []byte
- func D437(s string) ([]byte, error)
- func DString(s string, c *charmap.Charmap) ([]byte, error)
- func E437(s string) ([]byte, error)
- func EString(s string, c *charmap.Charmap) ([]byte, error)
- func Encoder(name string) (encoding.Encoding, error)
- func Encodings() []encoding.Encoding
- func HexDecode(s string) ([]byte, error)
- func HexEncode(s string) []byte
- func Humanize(name string) string
- func ISO11Name(name string) bool
- func List() *bytes.Buffer
- func MakeBytes() []byte
- func Mark(b []byte) []byte
- func Numeric(name string) int
- func Table(name string) (*bytes.Buffer, error)
- func TrimEOF(b []byte) []byte
- func Uniform(mime string) string
- type Cell
- type Convert
- func (c *Convert) ANSI(b ...byte) ([]rune, error)
- func (c *Convert) ANSIControls() *Convert
- func (c *Convert) Chars(b ...byte) ([]rune, error)
- func (c *Convert) Dump(b ...byte) ([]rune, error)
- func (c *Convert) LineBreaks()
- func (c *Convert) RunesControls()
- func (c *Convert) RunesControlsEBCDIC()
- func (c *Convert) RunesDOS()
- func (c *Convert) RunesEBCDIC()
- func (c *Convert) RunesKOI8()
- func (c *Convert) RunesLatin()
- func (c *Convert) RunesMacintosh()
- func (c *Convert) RunesShiftJIS()
- func (c *Convert) RunesUTF8()
- func (c *Convert) RunesWindows()
- func (c *Convert) Swap() *Convert
- func (c *Convert) Text(b ...byte) ([]rune, error)
- func (c *Convert) Transform() error
- type Encoding
- type Flag
- type In
Examples ¶
Constants ¶
const ( DosSUB = 8594 SymbolSUB = 9242 )
const ( // NUL Null control code. NUL = iota // SOH Start of heading. SOH // STX Start of text. STX // ETX End of text. ETX // EOT End of transmission. EOT // ENQ Enquiry. ENQ // ACK Acknowledge. ACK // BEL Bell or alert. BEL // BS Backspace. BS // HT Horizontal tabulation. HT // LF Line feed. LF // VT Vertical tabulation. VT // FF Form feed. FF // CR Carriage return. CR // SO Shift out. SO // SI Shift in. SI // DLE Data Link Escape. DLE // DC1 Device control one. DC1 // DC2 Device control two. DC2 // DC3 Device control three. DC3 // DC4 Device control four. DC4 // NAK Negative acknowledge. NAK // SYN Synchronous idle. SYN // ETB End of transmission block. ETB // CAN Cancel. CAN // EM End of medium. EM // SUB Substitute. SUB // ESC Escape. ESC // FS File separator. FS // GS Group separator. GS // RS Record separator. RS // US Unit separator. US // SP Space. SP )
const ( // LeftSquareBracket [. LeftSquareBracket = 91 // VerticalBar |. VerticalBar = 124 // DEL Delete. DEL = 127 // Dash Hyphen -. Dash = 150 // Nbsp Non-breaking space. Nbsp = 160 // InvertedExclamation ¡. InvertedExclamation = 161 // Cent ¢. Cent = 162 // BrokenBar ¦. BrokenBar = 166 // Negation ¬. Negation = 172 // PlusMinus ±. PlusMinus = 177 // LightVertical light vertical │. LightVertical = 179 // SquareRoot Square root √. SquareRoot = 251 // NBSP Non-breaking space. NBSP = 255 // Delta Δ. Delta = 916 // LeftwardsArrow ←. LeftwardsArrow = 8592 // SquareRootU Unicode square root √. SquareRootU = 8730 // House ⌂. House = 8962 // IntegralExtension ⎮. IntegralExtension = 9134 // SymbolNUL ␀. SymbolNUL = 9216 // SymbolESC ␛. SymbolESC = 9243 // SymbolDEL ␡. SymbolDEL = 9249 // LightVerticalU Box drawing light vertical │. LightVerticalU = 9474 // CheckMark ✓. CheckMark = 10003 // Replacement character �. Replacement = 65533 // Open Box ␣. OpenBox = 9251 )
Variables ¶
var ( // AsaX34_1963 ASA X3.4 1963. AsaX34_1963 encoding.Encoding = &x34_1963 // nolint: gochecknoglobals // AsaX34_1965 ASA X3.4 1965. AsaX34_1965 encoding.Encoding = &x34_1965 // nolint: gochecknoglobals // AnsiX34_1967 ANSI X3.4 1967/77/86. AnsiX34_1967 encoding.Encoding = &x34_1967 // nolint: gochecknoglobals )
var ( ErrChainANSI = errors.New("ansi() is a chain method that is to be used" + " in conjunction with swap: c.swap().ansi()") ErrChainWrap = errors.New("wrapWidth() is a chain method that is to be" + " used in conjunction with swap: c.swap().wrapWidth()") ErrBytes = errors.New("cannot transform an empty byte slice") ErrEncoding = errors.New("no encoding provided") ErrName = errors.New("encoding cannot match name or alias") ErrUTF8 = errors.New("string cannot encode to utf-8") ErrUTF16 = errors.New("utf-16 table encodings are not supported") ErrUTF32 = errors.New("utf-32 table encodings are not supported") ErrWidth = errors.New("cannot determine the number columns from using line break") )
var (
ErrNilEncoding = errors.New("character encoding cannot be a nil value")
)
var (
ErrNoName = errors.New("there is no encoding name")
)
Functions ¶
func AsaX34 ¶ added in v0.0.38
AsaX34 returns a named value for the legacy ASA ASCII character encodings.
func BOM ¶
func BOM() []byte
BOM is the UTF-8 byte order mark prefix.
Example ¶
fmt.Printf("%X", BOM())
Output: EFBBBF
func D437 ¶
D437 decodes IBM Code Page 437 encoded text.
Example ¶
const name = base + "cp437In.txt" result, err := D437(cp437hex) if err != nil { log.Fatal(err) } _, err = filesystem.SaveTemp(name, result...) if err != nil { log.Fatal(err) } t, err := filesystem.ReadText(name) if err != nil { log.Fatal(err) } fmt.Print(t)
Output: ═╣▓╠═
func E437 ¶
E437 encodes text into IBM Code Page 437.
Example ¶
const name = base + "cp437.txt" result, err := E437(utf) if err != nil { log.Fatal(err) } _, err = filesystem.SaveTemp(name, result...) if err != nil { log.Fatal(err) } t, err := filesystem.ReadText(name) if err != nil { log.Fatal(err) } filesystem.Clean(name) fmt.Print(len(t))
Output: 8
func MakeBytes ¶
func MakeBytes() []byte
MakeBytes generates a 256 character or 8-bit container ready to hold legacy code point values.
func Numeric ¶ added in v0.0.38
Numeric returns a numeric alias for a character encoding. A -1 int is returned whenever an alias could not be generated. Unicode based encodings always return -1.
Types ¶
type Convert ¶ added in v0.0.31
type Convert struct { Flags Flag // Commandline supplied flag values. Input In // Input text for transformation. Output []rune // Transformed UTF-8 runes. // contains filtered or unexported fields }
Convert 8-bit legacy or other Unicode text to UTF-8.
func (*Convert) ANSI ¶ added in v0.0.31
ANSI transforms legacy encoded ANSI into modern UTF-8 text. It displays ASCII control codes as characters. It obeys the DOS end of file marker.
func (*Convert) ANSIControls ¶ added in v0.0.31
ANSIControls replaces out all ←[ and ␛[ character matches with functional ANSI escape controls.
func (*Convert) Chars ¶ added in v0.0.31
Chars transforms legacy encoded characters and text control codes into UTF-8 characters. It displays both ASCII and ANSI control codes as characters. It ignores the DOS end of file marker.
func (*Convert) Dump ¶ added in v0.0.31
Dump transforms legacy encoded text or ANSI into modern UTF-8 text. It obeys common ASCII control codes. It ignores the DOS end of file marker.
func (*Convert) LineBreaks ¶ added in v0.0.31
func (c *Convert) LineBreaks()
LineBreaks will try to guess the line break representation as a 2 byte value. A guess of Unix will return [10, 0], Windows [13, 10], otherwise a [0, 0] value is returned.
func (*Convert) RunesControls ¶ added in v0.0.31
func (c *Convert) RunesControls()
RunesControls switches out C0 and C1 ASCII controls with Unicode Control Picture represenations.
func (*Convert) RunesControlsEBCDIC ¶ added in v0.0.31
func (c *Convert) RunesControlsEBCDIC()
RunesControlsEBCDIC switches out EBCDIC controls with Unicode Control Picture represenations.
func (*Convert) RunesDOS ¶ added in v0.0.31
func (c *Convert) RunesDOS()
RunesDOS switches out C0, C1 and other controls with PC/MS-DOS picture glyphs.
func (*Convert) RunesEBCDIC ¶ added in v0.0.31
func (c *Convert) RunesEBCDIC()
RunesEBCDIC switches out EBCDIC IBM mainframe controls with Unicode picture represenations. Where no appropriate picture exists a space placeholder is used.
func (*Convert) RunesKOI8 ¶ added in v0.0.31
func (c *Convert) RunesKOI8()
RunesKOI8 blanks out unused C0, C1 and other controls spaces for Russian sets.
func (*Convert) RunesLatin ¶ added in v0.0.31
func (c *Convert) RunesLatin()
RunesLatin blanks out unused C0, C1 and other controls spaces for ISO Latin sets.
func (*Convert) RunesMacintosh ¶ added in v0.0.31
func (c *Convert) RunesMacintosh()
RunesMacintosh replaces specific Mac OS Roman characters with Unicode picture represenations.
func (*Convert) RunesShiftJIS ¶ added in v0.0.31
func (c *Convert) RunesShiftJIS()
RunesShiftJIS tweaks some Unicode picture represenations for Shift-JIS.
func (*Convert) RunesUTF8 ¶ added in v0.0.31
func (c *Convert) RunesUTF8()
RunesUTF8 tweaks some Unicode picture represenations for UTF-8 Basic Latin.
func (*Convert) RunesWindows ¶ added in v0.0.31
func (c *Convert) RunesWindows()
RunesWindows tweaks some Unicode picture represenations for Windows-125x sets.
func (*Convert) Swap ¶ added in v0.0.31
Swap transforms character map and control codes into UTF-8 unicode runes.
type Encoding ¶
Encoding is an implementation of the Encoding interface that adds the String and ID methods to an existing encoding.