Documentation ¶
Index ¶
- Variables
- func CashTag(text string) []string
- func CashTagIndex(text string) [][]int
- func Email(text string) []string
- func EmailIndex(text string) [][]int
- func Emoticon(text string) []string
- func EmoticonIndex(text string) [][]int
- func EmoticonWordPunct(text string) []string
- func EmoticonWordPunctIndex(text string) [][]int
- func Filter(s []string, fn func(string) bool) []string
- func HashTag(text string) []string
- func HashTagIndex(text string) [][]int
- func MatchAny(text string, patterns ...*regexp.Regexp) bool
- func Mention(text string) []string
- func MentionIndex(text string) [][]int
- func Split(text string, filters ...func(t string) [][]int) []string
- func SplitIndex(text string, filters ...func(t string) [][]int) [][]int
- func SplitNatural(text string) []string
- func URL(text string) []string
- func URLIndex(text string) [][]int
- func WordPunct(text string) []string
- func WordPunctIndex(text string) [][]int
Constants ¶
This section is empty.
Variables ¶
var CashTagRegexp = regexp.MustCompile("(?i)\\$([A-Za-z]+[A-Za-z0-9_]*)")
CashTagRegexp will look for cashtags
var EmailRegexp = regexp.MustCompile("(?i)[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]+\\b")
EmailRegexp will look for email expressions within text
var EmoticonWordPunctuationRegexp = regexp.MustCompile(combinedWordPunctuationPattern)
EmoticonWordPunctuationRegexp is a combined emoticons and word punctuation data tokenization pattern
var EmoticonsRegexp = regexp.MustCompile("(?i)" + emoticonsPattern)
EmoticonsRegexp is a pattern for tokenizing on various emoticons
var HTTPWWWRegexp = regexp.MustCompile("(?i)^(?:https?://){0,1}(?:www\\.){0,1}")
/ HTTPWWWRegexp will look for http or www prefixes
var HashTagRegexp = regexp.MustCompile("(?i)#([A-Za-zÀ-ÿ0-9\\-_&;]+)")
HashTagRegexp will look for hashtags
var MentionRegexp = regexp.MustCompile("(?i)@([A-Za-zÀ-ÿ0-9\\-_&;]+)")
MentionRegexp will look for hashtags
var NumericRegexp = regexp.MustCompile("(?i)^\\d+\\%?")
NumericRegexp is a simple expression for simple repeated numbers as a quick pattern
var RepeatedPunctRegexp = regexp.MustCompile("(?i)\\%|(?:[\\!\\?]+)|\\!+|\\.+|;+|,+|:+|\\'+|\\\"+|-+|\\?+|\\&+|\\*+|\\(+|\\)+|_+|\\++|\\/+|\\\\+")
RepeatedPunctRegexp is a simple expression for repeated punctuation patterns
var URLRegexp = regexp.MustCompile("(?i)\\b(?:(?:https?)://|www\\.|ftp\\.)(?:\\([-A-Z0-9+&@#/%=~_|$?!:,.]*\\)|[-A-Z0-9+&@#/%=~_|$?!:,.])*(?:\\([-A-Z0-9+&@#/%=~_|$?!:,.'\"\"]*\\)?|[A-Z0-9+&@#/%=~_'\"\"|$])")
URLRegexp will look for urls
var WordPunctuationRegexp = regexp.MustCompile("(?i)" + wordPunctuationPattern)
WordPunctuationRegexp is a popular pattern for tokenizing on various types of data
Functions ¶
func CashTagIndex ¶
CashTagIndex to split and return the indexes for the hashtag regex pattern
func EmailIndex ¶
EmailIndex to split and return the indexes for the email regex pattern
func Emoticon ¶
Emoticon will split and return the strings of all the found emoticons using the regex pattern for emoticons
func EmoticonIndex ¶
EmoticonIndex will split and return the indexes of all the found emoticons using the regex pattern for emoticons
func EmoticonWordPunct ¶
EmoticonWordPunct to split and return strings for the combined emoticon and word punctuation regular expression patterns
func EmoticonWordPunctIndex ¶
EmoticonWordPunctIndex to split and return the indexes for the combined emoticon and word punctuation regular expression patterns
func HashTagIndex ¶
HashTagIndex to split and return the indexes for the hashtag regex pattern
func MentionIndex ¶
MentionIndex to split and return the indexes for the mention regex pattern
func Split ¶
Split to return the strings by passing the text through a pre-filter prior to a post-filter where the prefix is executed first before continuing to tokenize on the surrounding text (such as in Email & WordPunct)
func SplitIndex ¶
SplitIndex to return the indices of all the tokens that passed through the pre and post filters. The prefix is executed first and the postfix is executed on the surrounding text to the tokens found by the prefix filter.
func SplitNatural ¶
SplitNatural to split and return the list of strings tokenized by all common word patterns
func WordPunctIndex ¶
WordPunctIndex to split and return the indexes for matches with the word punctuation regular expression
Types ¶
This section is empty.