unidata

package
v2.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 5, 2021 License: MIT Imports: 7 Imported by: 0

Documentation

Overview

Package unidata contains information about Unicode characters.

Index

Constants

View Source
const (
	WidthAmbiguous = uint8(iota) // Ambiguous, A
	WidthFullWidth               // FullWidth, F
	WidthHalfWidth               // Halfwidth, H
	WidthNarrow                  // Narrow, N
	WidthNeutral                 // Neutral (Not East Asian), Na
	WidthWide                    // Wide, W
)
View Source
const (
	CatUnknown              = uint8(iota)
	CatUppercaseLetter      // Lu – an uppercase letter
	CatLowercaseLetter      // Ll – a lowercase letter
	CatTitlecaseLetter      // Lt – a digraphic character, with first part uppercase
	CatCasedLetter          // LC – Lu | Ll | Lt
	CatModifierLetter       // Lm – a modifier letter
	CatOtherLetter          // Lo – other letters, including syllables and ideographs
	CatLetter               // L  – Lu | Ll | Lt | Lm | Lo
	CatNonspacingMark       // Mn – a nonspacing combining mark (zero advance width)
	CatSpacingMark          // Mc – a spacing combining mark (positive advance width)
	CatEnclosingMark        // Me – an enclosing combining mark
	CatMark                 // M  – Mn | Mc | Me
	CatDecimalNumber        // Nd – a decimal digit
	CatLetterNumber         // Nl – a letterlike numeric character
	CatOtherNumber          // No – a numeric character of other type
	CatNumber               // N  – Nd | Nl | No
	CatConnectorPunctuation // Pc – a connecting punctuation mark, like a tie
	CatDashPunctuation      // Pd – a dash or hyphen punctuation mark
	CatOpenPunctuation      // Ps – an opening punctuation mark (of a pair)
	CatClosePunctuation     // Pe – a closing punctuation mark (of a pair)
	CatInitialPunctuation   // Pi – an initial quotation mark
	CatFinalPunctuation     // Pf – a final quotation mark
	CatOtherPunctuation     // Po – a punctuation mark of other type
	CatPunctuation          // P  – Pc | Pd | Ps | Pe | Pi | Pf | Po
	CatMathSymbol           // Sm – a symbol of mathematical use
	CatCurrencySymbol       // Sc – a currency sign
	CatModifierSymbol       // Sk – a non-letterlike modifier symbol
	CatOtherSymbol          // So – a symbol of other type
	CatSymbol               // S  – Sm | Sc | Sk | So
	CatSpaceSeparator       // Zs – a space character (of various non-zero widths)
	CatLineSeparator        // Zl – U+2028 LINE SEPARATOR only
	CatParagraphSeparator   // Zp – U+2029 PARAGRAPH SEPARATOR only
	CatSeparator            // Z  – Zs | Zl | Zp
	CatControl              // Cc – a C0 or C1 control code
	CatFormat               // Cf – a format control character
	CatSurrogate            // Cs – a surrogate code point
	CatPrivateUse           // Co – a private-use character
	CatUnassigned           // Cn – a reserved unassigned code point or a noncharacter
	CatOther                // C  – Cc | Cf | Cs | Co | Cn
)

http://www.unicode.org/reports/tr44/#General_Category_Values

View Source
const (
	GenderNone = 0
	GenderSign = 1
	GenderRole = 2
)
View Source
const UnknownCodepoint = "CODEPOINT NOT IN UNICODE"

Variables

View Source
var (
	Blocks = map[string][2]rune{}/* 300 elements not displayed */

	Blockmap = make(map[string]string)
)

TODO: generate this from the data file: https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt

View Source
var (
	Catmap = map[string]uint8{}/* 141 elements not displayed */

	Catnames = map[uint8]string{
		CatUppercaseLetter:      "Uppercase_Letter",
		CatLowercaseLetter:      "Lowercase_Letter",
		CatTitlecaseLetter:      "Titlecase_Letter",
		CatCasedLetter:          "Cased_Letter",
		CatModifierLetter:       "Modifier_Letter",
		CatOtherLetter:          "Other_Letter",
		CatLetter:               "Letter",
		CatNonspacingMark:       "Nonspacing_Mark",
		CatSpacingMark:          "Spacing_Mark",
		CatEnclosingMark:        "Enclosing_Mark",
		CatMark:                 "Mark",
		CatDecimalNumber:        "Decimal_Number",
		CatLetterNumber:         "Letter_Number",
		CatOtherNumber:          "Other_Number",
		CatNumber:               "Number",
		CatConnectorPunctuation: "Connector_Punctuation",
		CatDashPunctuation:      "Dash_Punctuation",
		CatOpenPunctuation:      "Open_Punctuation",
		CatClosePunctuation:     "Close_Punctuation",
		CatInitialPunctuation:   "Initial_Punctuation",
		CatFinalPunctuation:     "Final_Punctuation",
		CatOtherPunctuation:     "Other_Punctuation",
		CatPunctuation:          "Punctuation",
		CatMathSymbol:           "Math_Symbol",
		CatCurrencySymbol:       "Currency_Symbol",
		CatModifierSymbol:       "Modifier_Symbol",
		CatOtherSymbol:          "Other_Symbol",
		CatSymbol:               "Symbol",
		CatSpaceSeparator:       "Space_Separator",
		CatLineSeparator:        "Line_Separator",
		CatParagraphSeparator:   "Paragraph_Separator",
		CatSeparator:            "Separator",
		CatControl:              "Control",
		CatFormat:               "Format",
		CatSurrogate:            "Surrogate",
		CatPrivateUse:           "Private_Use",
		CatUnassigned:           "Unassigned",
		CatOther:                "Other",
	}
)
View Source
var Codepoints = map[rune]Codepoint{}/* 34626 elements not displayed */
View Source
var EmojiGroups = []string{
	"Smileys & Emotion",
	"People & Body",
	"Component",
	"Animals & Nature",
	"Food & Drink",
	"Travel & Places",
	"Activities",
	"Objects",
	"Symbols",
	"Flags",
}
View Source
var EmojiSubgroups = map[string][]string{
	"Smileys & Emotion": []string{
		"face-smiling",
		"face-affection",
		"face-tongue",
		"face-hand",
		"face-neutral-skeptical",
		"face-sleepy",
		"face-unwell",
		"face-hat",
		"face-glasses",
		"face-concerned",
		"face-negative",
		"face-costume",
		"cat-face",
		"monkey-face",
		"emotion",
	},
	"People & Body": []string{
		"hand-fingers-open",
		"hand-fingers-partial",
		"hand-single-finger",
		"hand-fingers-closed",
		"hands",
		"hand-prop",
		"body-parts",
		"person",
		"person-gesture",
		"person-role",
		"person-fantasy",
		"person-activity",
		"person-sport",
		"person-resting",
		"family",
		"person-symbol",
	},
	"Component": []string{
		"skin-tone",
		"hair-style",
	},
	"Animals & Nature": []string{
		"animal-mammal",
		"animal-bird",
		"animal-amphibian",
		"animal-reptile",
		"animal-marine",
		"animal-bug",
		"plant-flower",
		"plant-other",
	},
	"Food & Drink": []string{
		"food-fruit",
		"food-vegetable",
		"food-prepared",
		"food-asian",
		"food-marine",
		"food-sweet",
		"drink",
		"dishware",
	},
	"Travel & Places": []string{
		"place-map",
		"place-geographic",
		"place-building",
		"place-religious",
		"place-other",
		"transport-ground",
		"transport-water",
		"transport-air",
		"hotel",
		"time",
		"sky & weather",
	},
	"Activities": []string{
		"event",
		"award-medal",
		"sport",
		"game",
		"arts & crafts",
	},
	"Objects": []string{
		"clothing",
		"sound",
		"music",
		"musical-instrument",
		"phone",
		"computer",
		"light & video",
		"book-paper",
		"money",
		"mail",
		"writing",
		"office",
		"lock",
		"tool",
		"science",
		"medical",
		"household",
		"other-object",
	},
	"Symbols": []string{
		"transport-sign",
		"warning",
		"arrow",
		"religion",
		"zodiac",
		"av-symbol",
		"gender",
		"math",
		"punctuation",
		"currency",
		"other-symbol",
		"keycap",
		"alphanum",
		"geometric",
	},
	"Flags": []string{
		"flag",
		"country-flag",
		"subdivision-flag",
	},
}
View Source
var Emojis = []Emoji{}/* 1709 elements not displayed */
View Source
var Planes = map[string][2]rune{
	"Basic Multilingual Plane":              {0, 0xFFFF},
	"Supplementary Multilingual Plane":      {0x10000, 0x1FFFF},
	"Supplementary Ideographic Plane":       {0x20000, 0x2FFFF},
	"Tertiary Ideographic Plane":            {0x30000, 0x3FFFF},
	"Unassigned":                            {0x40000, 0xDFFFF},
	"Supplementary Special-purpose Plane":   {0xE0000, 0xEFFFF},
	"Supplementary Private Use Area planes": {0xF0000, 0x10FFFF},
}
View Source
var WidthNames = map[uint8]string{
	WidthAmbiguous: "ambiguous",
	WidthFullWidth: "full",
	WidthHalfWidth: "half",
	WidthNarrow:    "narrow",
	WidthNeutral:   "neutral",
	WidthWide:      "wide",
}

Functions

func CanonicalCategory

func CanonicalCategory(cat string) string

CanonicalCategory transforms a category name to the canonical representation.

func ToRune

func ToRune(s string) (rune, error)

ToRune converts a human input string to a rune.

The input can be as U+41, U+0041, U41, 0x41, 0o101, 0b1000001

Types

type Codepoint

type Codepoint struct {
	Codepoint rune
	Width     uint8
	Cat       uint8
	Name      string
	Digraph   string
	HTML      string
	KeySym    string // TODO: []string?
}

Codepoint is a single codepoint.

func Find

func Find(cp rune) (Codepoint, bool)

Find a codepoint.

func (Codepoint) Block

func (c Codepoint) Block() string

func (Codepoint) Category

func (c Codepoint) Category() string

func (Codepoint) Format

func (c Codepoint) Format(base int) string

func (Codepoint) FormatCodepoint

func (c Codepoint) FormatCodepoint() string

func (Codepoint) HTMLEntity

func (c Codepoint) HTMLEntity() string

func (Codepoint) JSON

func (c Codepoint) JSON() string

func (Codepoint) Plane

func (c Codepoint) Plane() string

func (Codepoint) Repr

func (c Codepoint) Repr(raw bool) string

func (Codepoint) String

func (c Codepoint) String() string

func (Codepoint) UTF16

func (c Codepoint) UTF16(bigEndian bool) string

func (Codepoint) UTF8

func (c Codepoint) UTF8() string

func (Codepoint) WidthName

func (c Codepoint) WidthName() string

func (Codepoint) XMLEntity

func (c Codepoint) XMLEntity() string

type Emoji

type Emoji struct {
	Codepoints      []rune
	Name            string
	Group, Subgroup int
	CLDR            []string
	SkinTones       bool
	Genders         int
}

Emoji is an emoji sequence.

func (Emoji) GroupName

func (e Emoji) GroupName() string

func (Emoji) String

func (e Emoji) String() string

func (Emoji) SubgroupName

func (e Emoji) SubgroupName() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL