golem

package module
v0.0.0-...-0bbe643 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 5, 2019 License: MIT Imports: 8 Imported by: 0

README

GoLem

This project is a dictionary based lemmatizer written in pure go, without external dependencies.

What?

A lemmatizer is a tool that finds the base form of words.

Lang Input Output
English aligning align
Swedish sprungit springa
French abattaient abattre

It's based on the dictionaries found on lexiconista.com, which are available under the Open Database License. This project would not be feasible without them.

Languages

At the moment I have added English, Swedish, French, Spanish & German, but adding another language should be no more trouble than getting the dictionary for that language. Some of which are already available on lexiconista. Please let me know if there is something you would like to see in here, or fork the project and create a pull request.

Basic usage
package main

import (
	"github.com/aaaton/golem"
)

func main() {
	// "en" and "english" will give an english lemmatizer
	lemmatizer, err := golem.New("english")
	if err != nil {
		panic(err)
	}
	word := lemmatizer.Lemma("Abducting")
	if word != "abduct" {
		panic("The output is not what is expected!")
	}
}

Documentation

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Lemmatizer

type Lemmatizer struct {
	// contains filtered or unexported fields
}

Lemmatizer is the key to lemmatizing a word in a language

func New

func New(locale string) (*Lemmatizer, error)

New produces a new Lemmatizer

func NewVoid

func NewVoid() (*Lemmatizer, error)

NewVoid produces a void Lemmatizer

func (*Lemmatizer) InDict

func (l *Lemmatizer) InDict(word string) bool

InDict checks if a certain word is in the dictionary

Example
package main

import (
	"fmt"

	"github.com/axamon/golem"
)

var en *golem.Lemmatizer = new(golem.Lemmatizer)
var fr *golem.Lemmatizer = new(golem.Lemmatizer)
var ge *golem.Lemmatizer = new(golem.Lemmatizer)
var it *golem.Lemmatizer = new(golem.Lemmatizer)
var sp *golem.Lemmatizer = new(golem.Lemmatizer)
var sw *golem.Lemmatizer = new(golem.Lemmatizer)

var exampleDataInDict = []struct {
	language string
	word     string
	result   bool
}{
	{"italian", "armadio", true},
	{"italian", "ammaccabanane", false},
	{"swedish", "Avtalet", true},
	{"swedish", "Avtalt", false},
}

func main() {
	for _, element := range exampleDataInDict {
		var l *golem.Lemmatizer = new(golem.Lemmatizer)
		switch element.language {
		case "italian":
			l = it
		case "english":
			l = en
		case "swedish":
			l = sw
		case "french":
			l = fr
		case "german":
			l = ge
		case "spanish":
			l = sp
		}
		fmt.Println(l.InDict(element.word))
	}
}
Output:

true
false
true
false

func (*Lemmatizer) Lemma

func (l *Lemmatizer) Lemma(word string) string

Lemma gets one of the base forms of a word

Example
package main

import (
	"fmt"
	"log"

	"github.com/axamon/golem"
)

var exampleDataLemma = []struct {
	language string
	word     string
}{
	{"english", "agreed"},
	{"italian", "armadi"},
	{"swedish", "Avtalet"},
}

func main() {
	for _, element := range exampleDataLemma {
		l, err := golem.New(element.language)
		if err != nil {
			log.Fatal(err)
		}
		fmt.Println(l.Lemma(element.word))
	}
}
Output:

agree
armadio
avtal

func (*Lemmatizer) LemmaLower

func (l *Lemmatizer) LemmaLower(word string) string

LemmaLower gets one of the base forms of a lower case word

func (*Lemmatizer) Lemmas

func (l *Lemmatizer) Lemmas(word string) []string

Lemmas gets all the base forms of a word

Example
package main

import (
	"fmt"

	"github.com/axamon/golem"
)

var en *golem.Lemmatizer = new(golem.Lemmatizer)
var fr *golem.Lemmatizer = new(golem.Lemmatizer)
var ge *golem.Lemmatizer = new(golem.Lemmatizer)
var it *golem.Lemmatizer = new(golem.Lemmatizer)
var sp *golem.Lemmatizer = new(golem.Lemmatizer)
var sw *golem.Lemmatizer = new(golem.Lemmatizer)

var exampleDataLemmas = []struct {
	language string
	word     string
	result   []string
}{
	{"italian", "soli", []string{"sole", "solo"}},
}

func main() {
	for _, element := range exampleDataLemmas {
		var l *golem.Lemmatizer = new(golem.Lemmatizer)
		switch element.language {
		case "italian":
			l = it
		case "english":
			l = en
		case "swedish":
			l = sw
		case "french":
			l = fr
		case "german":
			l = ge
		case "spanish":
			l = sp
		}
		lemmas := l.Lemmas(element.word)
		for _, lemma := range lemmas {
			fmt.Println(lemma)
		}
	}
}
Output:

solare
solere
solo
sole

Directories

Path Synopsis
IT

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL