Documentation
¶
Overview ¶
Package segmenter implements the ECMA-402 Intl.Segmenter constructor.
locales, _ := locale.ParseList("en-US")
seg, _ := segmenter.New(locales, segmenter.Options{Granularity: segmenter.WordGranularity})
segments := seg.Segment("Hello, world!")
_ = segments
See README.md for usage examples and SPECS/46-segmenter.md for the contract.
Example ¶
Example demonstrates Intl.Segmenter.prototype.segment from ECMA-402.
package main
import (
"fmt"
"github.com/agentable/go-intl/locale"
"github.com/agentable/go-intl/segmenter"
)
func main() {
words, err := segmenter.New(mustLocaleList("en"), segmenter.Options{
Granularity: segmenter.WordGranularity,
})
if err != nil {
panic(err)
}
for segment := range words.Segment("Hello, world!").All() {
if segment.IsWordLike {
fmt.Println(segment.Segment)
}
}
}
func mustLocaleList(tags ...string) locale.List {
locales, err := locale.ParseList(tags...)
if err != nil {
panic(err)
}
return locales
}
Output: Hello world
Example (Options) ¶
Example_options demonstrates Intl.Segmenter constructor options from ECMA-402.
package main
import (
"fmt"
"github.com/agentable/go-intl/locale"
"github.com/agentable/go-intl/segmenter"
)
func main() {
sentences, err := segmenter.New(mustLocaleList("en"), segmenter.Options{
Granularity: segmenter.SentenceGranularity,
})
if err != nil {
panic(err)
}
for segment := range sentences.Segment("Hello. Goodbye.").All() {
fmt.Printf("%q\n", segment.Segment)
}
}
func mustLocaleList(tags ...string) locale.List {
locales, err := locale.ParseList(tags...)
if err != nil {
panic(err)
}
return locales
}
Output: "Hello. " "Goodbye."
Index ¶
Examples ¶
Constants ¶
const ( LookupLocaleMatcher LocaleMatcher = "lookup" BestFitLocaleMatcher LocaleMatcher = "best fit" GraphemeGranularity Granularity = "grapheme" WordGranularity Granularity = "word" SentenceGranularity Granularity = "sentence" )
Variables ¶
This section is empty.
Functions ¶
Types ¶
type Granularity ¶
type Granularity string
type LocaleMatcher ¶
type LocaleMatcher string
type Options ¶
type Options struct {
LocaleMatcher LocaleMatcher
Granularity Granularity
}
Options mirrors the JS Intl.Segmenter options bag.
type ResolvedOptions ¶
type ResolvedOptions struct {
// Locale is the resolved locale. Mirrors Intl.Segmenter resolved option "locale".
Locale locale.Locale `json:"locale"`
// Granularity is the resolved segmentation granularity. Mirrors Intl.Segmenter resolved option "granularity".
Granularity Granularity `json:"granularity"`
}
type Segment ¶
type Segment struct {
Segment string `json:"segment"`
CodeUnitIndex int `json:"index"`
ByteIndex int `json:"-"`
Input string `json:"input"`
IsWordLike bool `json:"isWordLike"`
}
Segment represents one boundary-delimited chunk of a string.
type Segmenter ¶
type Segmenter struct {
// contains filtered or unexported fields
}
Segmenter splits strings into graphemes, words, or sentences.
func (*Segmenter) ResolvedOptions ¶
func (f *Segmenter) ResolvedOptions() ResolvedOptions
type Segments ¶
type Segments struct {
// contains filtered or unexported fields
}
Segments holds the segmentation view of a specific input string.
func (*Segments) Containing ¶
Containing returns the segment that contains UTF-16 code-unit index. The bool is false when index is out of range, matching the JS `Segments.prototype.containing` undefined return.
Example ¶
ExampleSegments_Containing demonstrates Intl.Segments.prototype.containing from ECMA-402.
package main
import (
"fmt"
"github.com/agentable/go-intl/locale"
"github.com/agentable/go-intl/segmenter"
)
func main() {
words, err := segmenter.New(mustLocaleList("en"), segmenter.Options{
Granularity: segmenter.WordGranularity,
})
if err != nil {
panic(err)
}
segment, ok := words.Segment("Hello, world!").Containing(8)
fmt.Println(segment.Segment, ok)
}
func mustLocaleList(tags ...string) locale.List {
locales, err := locale.ParseList(tags...)
if err != nil {
panic(err)
}
return locales
}
Output: world true