ckb

package
Version: v0.0.0-...-a41f229 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 11, 2015 License: Apache-2.0 Imports: 6 Imported by: 0

Documentation

Index

Constants

View Source
const (
	Yeh        = '\u064A'
	DotlessYeh = '\u0649'
	FarsiYeh   = '\u06CC'

	Kaf   = '\u0643'
	Keheh = '\u06A9'

	Heh            = '\u0647'
	Ae             = '\u06D5'
	Zwnj           = '\u200C'
	HehDoachashmee = '\u06BE'
	TehMarbuta     = '\u0629'

	Reh       = '\u0631'
	Rreh      = '\u0695'
	RrehAbove = '\u0692'

	Tatweel  = '\u0640'
	Fathatan = '\u064B'
	Dammatan = '\u064C'
	Kasratan = '\u064D'
	Fatha    = '\u064E'
	Damma    = '\u064F'
	Kasra    = '\u0650'
	Shadda   = '\u0651'
	Sukun    = '\u0652'
)
View Source
const NormalizeName = "normalize_ckb"
View Source
const StemmerName = "stemmer_ckb"
View Source
const StopName = "stop_ckb"

Variables

View Source
var SoraniStopWords = []byte(`# set of kurdish stopwords
# note these have been normalized with our scheme (e represented with U+06D5, etc)
# constructed from:
# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc

# and
و
# which
کە
# of
ی
# made/did
کرد
# that/which
ئەوەی
# on/head
سەر
# two
دوو
# also
هەروەها
# from/that
لەو
# makes/does
دەکات
# some
چەند
# every
هەر

# demonstratives
# that
ئەو
# this
ئەم

# personal pronouns
# I
من
# we
ئێمە
# you
تۆ
# you
ئێوە
# he/she/it
ئەو
# they
ئەوان

# prepositions
# to/with/by
بە
پێ
# without
بەبێ
# along with/while/during
بەدەم
# in the opinion of
بەلای
# according to
بەپێی
# before
بەرلە
# in the direction of
بەرەوی
# in front of/toward
بەرەوە
# before/in the face of
بەردەم
# without
بێ
# except for
بێجگە
# for
بۆ
# on/in
دە
تێ
# with
دەگەڵ
# after
دوای
# except for/aside from
جگە
# in/from
لە
لێ
# in front of/before/because of
لەبەر
# between/among
لەبەینی
# concerning/about
لەبابەت
# concerning
لەبارەی
# instead of
لەباتی
# beside
لەبن
# instead of
لەبرێتی
# behind
لەدەم
# with/together with
لەگەڵ
# by
لەلایەن
# within
لەناو
# between/among
لەنێو
# for the sake of
لەپێناوی
# with respect to
لەرەوی
# by means of/for
لەرێ
# for the sake of
لەرێگا
# on/on top of/according to
لەسەر
# under
لەژێر
# between/among
ناو
# between/among
نێوان
# after
پاش
# before
پێش
# like
وەک
`)

Functions

func NormalizerFilterConstructor

func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error)

func StemmerFilterConstructor

func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error)

func StopTokenFilterConstructor

func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error)

func TokenMapConstructor

func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error)

Types

type SoraniNormalizeFilter

type SoraniNormalizeFilter struct {
}

func NewSoraniNormalizeFilter

func NewSoraniNormalizeFilter() *SoraniNormalizeFilter

func (*SoraniNormalizeFilter) Filter

type SoraniStemmerFilter

type SoraniStemmerFilter struct {
}

func NewSoraniStemmerFilter

func NewSoraniStemmerFilter() *SoraniStemmerFilter

func (*SoraniStemmerFilter) Filter

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL