segmenter

package
v0.2.8 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 19, 2026 License: MIT Imports: 10 Imported by: 0

Documentation

Overview

Package segmenter implements the ECMA-402 Intl.Segmenter constructor.

locales, _ := locale.ParseList("en-US")
seg, _ := segmenter.New(locales, segmenter.Options{Granularity: segmenter.WordGranularity})
segments := seg.Segment("Hello, world!")
_ = segments

See README.md for usage examples and SPECS/46-segmenter.md for the contract.

Example

Example demonstrates Intl.Segmenter.prototype.segment from ECMA-402.

package main

import (
	"fmt"

	"github.com/agentable/go-intl/locale"
	"github.com/agentable/go-intl/segmenter"
)

func main() {
	words, err := segmenter.New(mustLocaleList("en"), segmenter.Options{
		Granularity: segmenter.WordGranularity,
	})
	if err != nil {
		panic(err)
	}

	for segment := range words.Segment("Hello, world!").All() {
		if segment.IsWordLike {
			fmt.Println(segment.Segment)
		}
	}

}

func mustLocaleList(tags ...string) locale.List {
	locales, err := locale.ParseList(tags...)
	if err != nil {
		panic(err)
	}
	return locales
}
Output:
Hello
world
Example (Options)

Example_options demonstrates Intl.Segmenter constructor options from ECMA-402.

package main

import (
	"fmt"

	"github.com/agentable/go-intl/locale"
	"github.com/agentable/go-intl/segmenter"
)

func main() {
	sentences, err := segmenter.New(mustLocaleList("en"), segmenter.Options{
		Granularity: segmenter.SentenceGranularity,
	})
	if err != nil {
		panic(err)
	}

	for segment := range sentences.Segment("Hello. Goodbye.").All() {
		fmt.Printf("%q\n", segment.Segment)
	}

}

func mustLocaleList(tags ...string) locale.List {
	locales, err := locale.ParseList(tags...)
	if err != nil {
		panic(err)
	}
	return locales
}
Output:
"Hello. "
"Goodbye."

Index

Examples

Constants

View Source
const (
	LookupLocaleMatcher  LocaleMatcher = "lookup"
	BestFitLocaleMatcher LocaleMatcher = "best fit"

	GraphemeGranularity Granularity = "grapheme"
	WordGranularity     Granularity = "word"
	SentenceGranularity Granularity = "sentence"
)

Variables

This section is empty.

Functions

func SupportedLocalesOf

func SupportedLocalesOf(locales locale.List, opts Options) (locale.List, error)

Types

type Granularity

type Granularity string

type LocaleMatcher

type LocaleMatcher string

type Options

type Options struct {
	LocaleMatcher LocaleMatcher
	Granularity   Granularity
}

Options mirrors the JS Intl.Segmenter options bag.

type ResolvedOptions

type ResolvedOptions struct {
	// Locale is the resolved locale. Mirrors Intl.Segmenter resolved option "locale".
	Locale locale.Locale `json:"locale"`
	// Granularity is the resolved segmentation granularity. Mirrors Intl.Segmenter resolved option "granularity".
	Granularity Granularity `json:"granularity"`
}

type Segment

type Segment struct {
	Segment       string `json:"segment"`
	CodeUnitIndex int    `json:"index"`
	ByteIndex     int    `json:"-"`
	Input         string `json:"input"`
	IsWordLike    bool   `json:"isWordLike"`
}

Segment represents one boundary-delimited chunk of a string.

type Segmenter

type Segmenter struct {
	// contains filtered or unexported fields
}

Segmenter splits strings into graphemes, words, or sentences.

func New

func New(locales locale.List, opts Options) (*Segmenter, error)

New constructs a Segmenter for the requested locale and options.

func (*Segmenter) ResolvedOptions

func (f *Segmenter) ResolvedOptions() ResolvedOptions

func (*Segmenter) Segment

func (f *Segmenter) Segment(input string) *Segments

Segment returns a Segments view over s using the resolved granularity.

type Segments

type Segments struct {
	// contains filtered or unexported fields
}

Segments holds the segmentation view of a specific input string.

func (*Segments) All

func (s *Segments) All() iter.Seq[Segment]

All iterates over every segment in order.

func (*Segments) Containing

func (s *Segments) Containing(index int) (Segment, bool)

Containing returns the segment that contains UTF-16 code-unit index. The bool is false when index is out of range, matching the JS `Segments.prototype.containing` undefined return.

Example

ExampleSegments_Containing demonstrates Intl.Segments.prototype.containing from ECMA-402.

package main

import (
	"fmt"

	"github.com/agentable/go-intl/locale"
	"github.com/agentable/go-intl/segmenter"
)

func main() {
	words, err := segmenter.New(mustLocaleList("en"), segmenter.Options{
		Granularity: segmenter.WordGranularity,
	})
	if err != nil {
		panic(err)
	}

	segment, ok := words.Segment("Hello, world!").Containing(8)
	fmt.Println(segment.Segment, ok)

}

func mustLocaleList(tags ...string) locale.List {
	locales, err := locale.ParseList(tags...)
	if err != nil {
		panic(err)
	}
	return locales
}
Output:
world true

func (*Segments) ContainingByte

func (s *Segments) ContainingByte(index int) (Segment, bool)

ContainingByte returns the segment that contains UTF-8 byte offset index.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL