convert

package

v0.0.38 Latest Latest Go to latest Published: Oct 18, 2022 License: LGPL-3.0 Imports: 28 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/bengarrett/retrotxtgo

Links

Open Source Insights

Documentation ¶

Overview ¶

Package convert is extends Go's x/text/encoding capability to convert legacy encoded text to a modern UTF-8 encoding.

Example (Swap) ¶

fmt.Print(string(swap(DEL)))
fmt.Print(string(swap(SquareRoot)))

Output:

Δ✓

Index ¶

Constants
Variables
func AliasFmt(alias, value string, e encoding.Encoding) (string, error)
func AsaX34(e encoding.Encoding) string
func BOM() []byte
func D437(s string) ([]byte, error)
func DString(s string, c *charmap.Charmap) ([]byte, error)
func E437(s string) ([]byte, error)
func EString(s string, c *charmap.Charmap) ([]byte, error)
func Encoder(name string) (encoding.Encoding, error)
func Encodings() []encoding.Encoding
func HexDecode(s string) ([]byte, error)
func HexEncode(s string) []byte
func Humanize(name string) string
func ISO11Name(name string) bool
func List() *bytes.Buffer
func MakeBytes() []byte
func Mark(b []byte) []byte
func Numeric(name string) int
func Table(name string) (*bytes.Buffer, error)
func TrimEOF(b []byte) []byte
func Uniform(mime string) string
type Cell
- func Cells(e encoding.Encoding) (Cell, error)
type Convert
- func (c *Convert) ANSI(b ...byte) ([]rune, error)
- func (c *Convert) ANSIControls() *Convert
- func (c *Convert) Chars(b ...byte) ([]rune, error)
- func (c *Convert) Dump(b ...byte) ([]rune, error)
- func (c *Convert) LineBreaks()
- func (c *Convert) RunesControls()
- func (c *Convert) RunesControlsEBCDIC()
- func (c *Convert) RunesDOS()
- func (c *Convert) RunesEBCDIC()
- func (c *Convert) RunesKOI8()
- func (c *Convert) RunesLatin()
- func (c *Convert) RunesMacintosh()
- func (c *Convert) RunesShiftJIS()
- func (c *Convert) RunesUTF8()
- func (c *Convert) RunesWindows()
- func (c *Convert) Swap() *Convert
- func (c *Convert) Text(b ...byte) ([]rune, error)
- func (c *Convert) Transform() error
type Encoding
- func (e Encoding) String() string
type Flag
type In

Constants ¶

View Source

const (
	DosSUB    = 8594
	SymbolSUB = 9242
)

View Source

const (
	// NUL Null control code.
	NUL = iota
	// SOH Start of heading.
	SOH
	// STX Start of text.
	STX
	// ETX End of text.
	ETX
	// EOT End of transmission.
	EOT
	// ENQ Enquiry.
	ENQ
	// ACK Acknowledge.
	ACK
	// BEL Bell or alert.
	BEL
	// BS Backspace.
	BS
	// HT Horizontal tabulation.
	HT
	// LF Line feed.
	LF
	// VT Vertical tabulation.
	VT
	// FF Form feed.
	FF
	// CR Carriage return.
	CR
	// SO Shift out.
	SO
	// SI Shift in.
	SI
	// DLE Data Link Escape.
	DLE
	// DC1 Device control one.
	DC1
	// DC2 Device control two.
	DC2
	// DC3 Device control three.
	DC3
	// DC4 Device control four.
	DC4
	// NAK Negative acknowledge.
	NAK
	// SYN Synchronous idle.
	SYN
	// ETB End of transmission block.
	ETB
	// CAN Cancel.
	CAN
	// EM End of medium.
	EM
	// SUB Substitute.
	SUB
	// ESC Escape.
	ESC
	// FS File separator.
	FS
	// GS Group separator.
	GS
	// RS Record separator.
	RS
	// US Unit separator.
	US
	// SP Space.
	SP
)

View Source

const (
	// LeftSquareBracket [.
	LeftSquareBracket = 91
	// VerticalBar |.
	VerticalBar = 124
	// DEL Delete.
	DEL = 127
	// Dash Hyphen -.
	Dash = 150
	// Nbsp Non-breaking space.
	Nbsp = 160
	// InvertedExclamation ¡.
	InvertedExclamation = 161
	// Cent ¢.
	Cent = 162
	// BrokenBar ¦.
	BrokenBar = 166
	// Negation ¬.
	Negation = 172
	// PlusMinus ±.
	PlusMinus = 177
	// LightVertical light vertical │.
	LightVertical = 179
	// SquareRoot Square root √.
	SquareRoot = 251
	// NBSP Non-breaking space.
	NBSP = 255
	// Delta Δ.
	Delta = 916
	// LeftwardsArrow ←.
	LeftwardsArrow = 8592
	// SquareRootU Unicode square root √.
	SquareRootU = 8730
	// House ⌂.
	House = 8962
	// IntegralExtension ⎮.
	IntegralExtension = 9134
	// SymbolNUL ␀.
	SymbolNUL = 9216
	// SymbolESC ␛.
	SymbolESC = 9243
	// SymbolDEL ␡.
	SymbolDEL = 9249
	// LightVerticalU Box drawing light vertical │.
	LightVerticalU = 9474
	// CheckMark ✓.
	CheckMark = 10003
	// Replacement character �.
	Replacement = 65533
	// Open Box ␣.
	OpenBox = 9251
)

Variables ¶

View Source

var (
	// AsaX34_1963 ASA X3.4 1963.
	AsaX34_1963 encoding.Encoding = &x34_1963 // nolint: gochecknoglobals

	// AsaX34_1965 ASA X3.4 1965.
	AsaX34_1965 encoding.Encoding = &x34_1965 // nolint: gochecknoglobals

	// AnsiX34_1967 ANSI X3.4 1967/77/86.
	AnsiX34_1967 encoding.Encoding = &x34_1967 // nolint: gochecknoglobals

)

View Source

var (
	ErrChainANSI = errors.New("ansi() is a chain method that is to be used" +
		" in conjunction with swap: c.swap().ansi()")
	ErrChainWrap = errors.New("wrapWidth() is a chain method that is to be" +
		" used in conjunction with swap: c.swap().wrapWidth()")
	ErrBytes    = errors.New("cannot transform an empty byte slice")
	ErrEncoding = errors.New("no encoding provided")
	ErrName     = errors.New("encoding cannot match name or alias")
	ErrUTF8     = errors.New("string cannot encode to utf-8")
	ErrUTF16    = errors.New("utf-16 table encodings are not supported")
	ErrUTF32    = errors.New("utf-32 table encodings are not supported")
	ErrWidth    = errors.New("cannot determine the number columns from using line break")
)

View Source

var (
	ErrNilEncoding = errors.New("character encoding cannot be a nil value")
)

View Source

var (
	ErrNoName = errors.New("there is no encoding name")
)

Functions ¶

func AliasFmt ¶ added in v0.0.38

func AliasFmt(alias, value string, e encoding.Encoding) (string, error)

AliasFmt return character encoding aliases.

func AsaX34 ¶ added in v0.0.38

func AsaX34(e encoding.Encoding) string

AsaX34 returns a named value for the legacy ASA ASCII character encodings.

func BOM ¶

func BOM() []byte

BOM is the UTF-8 byte order mark prefix.

Example ¶

fmt.Printf("%X", BOM())

Output:

EFBBBF

func D437 ¶

func D437(s string) ([]byte, error)

D437 decodes IBM Code Page 437 encoded text.

Example ¶

const name = base + "cp437In.txt"
result, err := D437(cp437hex)
if err != nil {
	log.Fatal(err)
}
_, err = filesystem.SaveTemp(name, result...)
if err != nil {
	log.Fatal(err)
}
t, err := filesystem.ReadText(name)
if err != nil {
	log.Fatal(err)
}
fmt.Print(t)

Output:

═╣▓╠═

func DString ¶

func DString(s string, c *charmap.Charmap) ([]byte, error)

DString decodes simple character encoding text.

func E437 ¶

func E437(s string) ([]byte, error)

E437 encodes text into IBM Code Page 437.

Example ¶

const name = base + "cp437.txt"
result, err := E437(utf)
if err != nil {
	log.Fatal(err)
}
_, err = filesystem.SaveTemp(name, result...)
if err != nil {
	log.Fatal(err)
}
t, err := filesystem.ReadText(name)
if err != nil {
	log.Fatal(err)
}
filesystem.Clean(name)
fmt.Print(len(t))

Output:

8

func EString ¶

func EString(s string, c *charmap.Charmap) ([]byte, error)

EString encodes text into a simple character encoding.

func Encoder ¶ added in v0.0.33

func Encoder(name string) (encoding.Encoding, error)

Encoder returns the named character set encoder.

func Encodings ¶

func Encodings() []encoding.Encoding

Encodings returns all the supported legacy text encodings.

func HexDecode ¶

func HexDecode(s string) ([]byte, error)

HexDecode decodes a hexadecimal string into bytes.

func HexEncode ¶

func HexEncode(s string) []byte

HexEncode encodes a string into hexadecimal bytes.

func Humanize ¶ added in v0.0.31

func Humanize(name string) string

Humanize the encoding by using an shorter, less formal name.

func ISO11Name ¶ added in v0.0.38

func ISO11Name(name string) bool

func List ¶

func List() *bytes.Buffer

List returns a tabled list of supported IANA character set encodings.

func MakeBytes ¶

func MakeBytes() []byte

MakeBytes generates a 256 character or 8-bit container ready to hold legacy code point values.

func Mark ¶

func Mark(b []byte) []byte

Mark adds a UTF-8 byte order mark to the text if it doesn't already exist.

func Numeric ¶ added in v0.0.38

func Numeric(name string) int

Numeric returns a numeric alias for a character encoding. A -1 int is returned whenever an alias could not be generated. Unicode based encodings always return -1.

func Table ¶

func Table(name string) (*bytes.Buffer, error)

Table prints out all the characters in the named 8-bit character set.

func TrimEOF ¶ added in v0.0.33

func TrimEOF(b []byte) []byte

TrimEOF will cut text at the first occurrence of the SUB character. The SUB is used by DOS and CP/M as an end-of-file marker.

func Uniform ¶ added in v0.0.38

func Uniform(mime string) string

Uniform formats MIME values.

Types ¶

type Cell ¶ added in v0.0.38

type Cell struct {
	Name    string
	Value   string
	Numeric string
	Alias   string
}

func Cells ¶ added in v0.0.38

func Cells(e encoding.Encoding) (Cell, error)

Cells return character encoding details for use in a text table.

type Convert ¶ added in v0.0.31

type Convert struct {
	Flags  Flag   // Commandline supplied flag values.
	Input  In     // Input text for transformation.
	Output []rune // Transformed UTF-8 runes.
	// contains filtered or unexported fields
}

Convert 8-bit legacy or other Unicode text to UTF-8.

func (*Convert) ANSI ¶ added in v0.0.31

func (c *Convert) ANSI(b ...byte) ([]rune, error)

ANSI transforms legacy encoded ANSI into modern UTF-8 text. It displays ASCII control codes as characters. It obeys the DOS end of file marker.

func (*Convert) ANSIControls ¶ added in v0.0.31

func (c *Convert) ANSIControls() *Convert

ANSIControls replaces out all ←[ and ␛[ character matches with functional ANSI escape controls.

func (*Convert) Chars ¶ added in v0.0.31

func (c *Convert) Chars(b ...byte) ([]rune, error)

Chars transforms legacy encoded characters and text control codes into UTF-8 characters. It displays both ASCII and ANSI control codes as characters. It ignores the DOS end of file marker.

func (*Convert) Dump ¶ added in v0.0.31

func (c *Convert) Dump(b ...byte) ([]rune, error)

Dump transforms legacy encoded text or ANSI into modern UTF-8 text. It obeys common ASCII control codes. It ignores the DOS end of file marker.

func (*Convert) LineBreaks ¶ added in v0.0.31

func (c *Convert) LineBreaks()

LineBreaks will try to guess the line break representation as a 2 byte value. A guess of Unix will return [10, 0], Windows [13, 10], otherwise a [0, 0] value is returned.

func (*Convert) RunesControls ¶ added in v0.0.31

func (c *Convert) RunesControls()

RunesControls switches out C0 and C1 ASCII controls with Unicode Control Picture represenations.

func (*Convert) RunesControlsEBCDIC ¶ added in v0.0.31

func (c *Convert) RunesControlsEBCDIC()

RunesControlsEBCDIC switches out EBCDIC controls with Unicode Control Picture represenations.

func (*Convert) RunesDOS ¶ added in v0.0.31

func (c *Convert) RunesDOS()

RunesDOS switches out C0, C1 and other controls with PC/MS-DOS picture glyphs.

func (*Convert) RunesEBCDIC ¶ added in v0.0.31

func (c *Convert) RunesEBCDIC()

RunesEBCDIC switches out EBCDIC IBM mainframe controls with Unicode picture represenations. Where no appropriate picture exists a space placeholder is used.

func (*Convert) RunesKOI8 ¶ added in v0.0.31

func (c *Convert) RunesKOI8()

RunesKOI8 blanks out unused C0, C1 and other controls spaces for Russian sets.

func (*Convert) RunesLatin ¶ added in v0.0.31

func (c *Convert) RunesLatin()

RunesLatin blanks out unused C0, C1 and other controls spaces for ISO Latin sets.

func (*Convert) RunesMacintosh ¶ added in v0.0.31

func (c *Convert) RunesMacintosh()

RunesMacintosh replaces specific Mac OS Roman characters with Unicode picture represenations.

func (*Convert) RunesShiftJIS ¶ added in v0.0.31

func (c *Convert) RunesShiftJIS()

RunesShiftJIS tweaks some Unicode picture represenations for Shift-JIS.

func (*Convert) RunesUTF8 ¶ added in v0.0.31

func (c *Convert) RunesUTF8()

RunesUTF8 tweaks some Unicode picture represenations for UTF-8 Basic Latin.

func (*Convert) RunesWindows ¶ added in v0.0.31

func (c *Convert) RunesWindows()

RunesWindows tweaks some Unicode picture represenations for Windows-125x sets.

func (*Convert) Swap ¶ added in v0.0.31

func (c *Convert) Swap() *Convert

Swap transforms character map and control codes into UTF-8 unicode runes.

func (*Convert) Text ¶ added in v0.0.31

func (c *Convert) Text(b ...byte) ([]rune, error)

Text transforms legacy encoded text or ANSI into modern UTF-8 text. It obeys common ASCII control codes. It obeys the DOS end of file marker.

func (*Convert) Transform ¶ added in v0.0.31

func (c *Convert) Transform() error

Transform byte data from named character map encoded text into UTF-8.

type Encoding ¶

type Encoding struct {
	encoding.Encoding
	Name string
}

Encoding is an implementation of the Encoding interface that adds the String and ID methods to an existing encoding.

func (Encoding) String ¶ added in v0.0.33

func (e Encoding) String() string

type Flag ¶ added in v0.0.33

type Flag struct {
	Controls  []string // Always use these control codes.
	SwapChars []string // Swap out these characters with UTF-8 alternatives.
	MaxWidth  int      // Maximum text width per-line.
}

Flag are the user supplied values.

type In ¶ added in v0.0.33

type In struct {
	Encoding encoding.Encoding // Bytes text encoding.
	Bytes    []byte            // Input text as bytes.
	// contains filtered or unexported fields
}

In is the text input for conversion.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL