Documentation ¶
Overview ¶
Package txt provides text and linguistics related functionality.
Copyright (c) 2018 - 2024 PhotoPrism UG. All rights reserved.
This program is free software: you can redistribute it and/or modify it under Version 3 of the GNU Affero General Public License (the "AGPL"): <https://docs.photoprism.app/license/agpl> This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. The AGPL is supplemented by our Trademark and Brand Guidelines, which describe how our Brand Assets may be used: <https://www.photoprism.app/trademark>
Feel free to send an email to hello@photoprism.app if you have questions, want to support our work, or just want to say hello.
Additional information can be found in our Developer Guide: <https://docs.photoprism.app/developer-guide/>
Index ¶
- Constants
- Variables
- func AddToWords(existing []string, words string) []string
- func AppendName(s, n string) string
- func Bool(s string) bool
- func Clip(s string, size int) string
- func ContainsASCIILetters(s string) bool
- func ContainsLetters(s string) bool
- func ContainsNumber(s string) bool
- func ContainsSymbols(s string) bool
- func CountryCode(s string) (code string)
- func DateFromFilePath(s string) (result time.Time)
- func DateTime(t *time.Time) string
- func DateTimeDefault(s string) bool
- func Empty(s string) bool
- func EmptyDateTime(s string) bool
- func ExpandYear(s string) int
- func FileTitle(s string) string
- func FilenameKeywords(s string) (results []string)
- func FilenameWords(s string) (results []string)
- func Float(s string) float64
- func Float32(s string) float32
- func FloatRange(s string, min, max float64) (start float64, end float64, err error)
- func Int(s string) int
- func Int64(s string) int64
- func IntRange(s string, min, max int) (start int, end int, err error)
- func IntVal(s string, min, max, def int) (i int)
- func Is(rangeTab *unicode.RangeTable, s string) bool
- func IsASCII(s string) bool
- func IsFloat(s string) bool
- func IsLatin(s string) bool
- func IsPosInt(s string) bool
- func IsTime(s string) bool
- func IsUInt(s string) bool
- func JoinNames(names []string, shorten bool) (result string)
- func Keywords(s string) (results []string)
- func LogParam(s string) string
- func LogParamLower(s string) string
- func MergeWords(w1, w2 string) string
- func NTimes(n int) string
- func NameKeywords(names, aliases string) (results []string)
- func New(s string) bool
- func No(s string) bool
- func NormalizeUtcOffset(s string) string
- func NotEmpty(s string) bool
- func Numeric(s string) string
- func ParseTime(s, timeZone string) (t time.Time)
- func QueryTooShort(q string) bool
- func Quote(text string) string
- func QuoteLower(text string) string
- func RemoveFromWords(words []string, remove string) (results []string)
- func ReplaceSpaces(s string, char string) string
- func SearchTerms(s string) map[string]bool
- func Shorten(s string, size int, suffix string) string
- func Slug(s string) string
- func SlugToTitle(s string) string
- func SortCaseInsensitive(words []string)
- func Spaced(s string) string
- func StopwordsOnly(s string) bool
- func StripOr(s string) string
- func TimeOffset(utcOffset string) (seconds int, err error)
- func TimeZone(offset string) *time.Location
- func Title(s string) string
- func UInt(s string) uint
- func UniqueKeywords(s string) (results []string)
- func UniqueNames(names []string) (result []string)
- func UniqueWords(words []string) (results []string)
- func UnixTime(t int64) string
- func UnknownWord(s string) bool
- func UpperFirst(str string) string
- func UtcOffset(local, utc time.Time, offset string) string
- func Words(s string) (results []string)
- func Year(s string) int
- func Yes(s string) bool
- type LookupTable
- type LookupTableMap
- type Name
Constants ¶
const ( Ellipsis = "…" ClipCountry = 2 ClipRole = 32 ClipPasscode = 36 ClipKeyword = 40 ClipIP = 48 ClipRealm = 64 ClipUsername = 64 ClipPassword = 72 ClipSlug = 80 ClipCategory = 100 ClipTokenName = 128 ClipDefault = 160 ClipName = 160 ClipLongName = 200 ClipEmail = 255 ClipPath = 500 ClipComment = 512 ClipURL = 512 ClipLog = 512 ClipFlags = 767 ClipShortText = 1024 ClipText = 2048 ClipLongText = 4096 )
const ( MonthMin = 1 MonthMax = 12 DayMin = 1 DayMax = 31 HourMin = 0 HourMax = 24 MinMin = 0 MinMax = 59 SecMin = 0 SecMax = 59 )
const ( EnOr = "or" EnAnd = "and" EnWith = "with" EnIn = "in" EnAt = "at" EnNew = "new" )
const ( EmptyString = "" Space = " " Or = "|" And = "&" )
const OneYear = time.Hour * 24 * 365
OneYear represents a duration of 365 days.
Variables ¶
var ( YearMin = 1970 YearMinShort = 90 YearMax = time.Now().Add(OneYear * 3).Year() YearShort = Int(time.Now().Format("06")) )
var ContainsNumberRegexp = regexp.MustCompile("\\d+")
var Countries = map[string]string{}/* 468 elements not displayed */
var CountryWordsRegexp = regexp.MustCompile("[\\p{L}]{2,}")
var DateIntRegexp = regexp.MustCompile("\\d{1,4}")
var DatePathRegexp = regexp.MustCompile("\\D\\d{4}/\\d{1,2}/?\\d*")
var DateRegexp = regexp.MustCompile("\\D\\d{4}[\\-_]\\d{2}[\\-_]\\d{2,}")
var DateTimeRegexp = regexp.MustCompile("\\D\\d{2,4}[\\-_]\\d{2}[\\-_]\\d{2}.{1,4}\\d{2}\\D\\d{2}\\D\\d{2,}")
var DateWhatsAppRegexp = regexp.MustCompile("(?:IMG|VID)-(?P<year>\\d{4})(?P<month>\\d{2})(?P<day>\\d{2})-WA")
var ExifDateTimeMatch = make(map[string]int)
var ExifDateTimeRegexp = regexp.MustCompile("((?P<year>\\d{4})|\\D{4})\\D((?P<month>\\d{2})|\\D{2})\\D((?P<day>\\d{2})|\\D{2})\\D((?P<h>\\d{2})|\\D{2})\\D((?P<m>\\d{2})|\\D{2})\\D((?P<s>\\d{2})|\\D{2})(\\.(?P<subsec>\\d+))?(?P<z>\\D)?(?P<zh>\\d{2})?\\D?(?P<zm>\\d{2})?")
var FileTitleRegexp = regexp.MustCompile("[\\p{L}\\-,':&+!?!]{1,}|( [&+] )?")
var FilenameKeywordsRegexp = regexp.MustCompile("[\\p{L}]{1,}")
var IsDateRegexp = regexp.MustCompile("\\d{4}[\\-_]?\\d{2}[\\-_]?\\d{2}")
var IsDateTimeRegexp = regexp.MustCompile("\\d{4}[\\-_]?\\d{2}[\\-_]?\\d{2}.{1,4}\\d{2}\\D?\\d{2}\\D?\\d{2}")
var IsNameSuffix map[string]bool
var IsNameTitle map[string]bool
var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\-']{1,}")
var Months = [...]string{
"Unknown",
"January",
"February",
"March",
"April",
"May",
"June",
"July",
"August",
"September",
"October",
"November",
"December",
}
Months contains all month names in English.
var NameSuffixes = []string{"esq", "esquire", "jr", "jnr", "sr", "snr", "2", "ii", "iii", "iv",
"v", "clu", "chfc", "cfp", "md", "phd", "j.d.", "ll.m.", "m.d.", "d.o.", "d.c.",
"p.c.", "ph.d."}
var NameTitles = []string{}/* 198 elements not displayed */
var ShortWords = map[string]bool{}/* 2602 elements not displayed */
ShortWords contains a list of words up to 3 letters for full-text indexing and title generation.
var SmallWords = map[string]bool{ "a": true, "an": true, "as": true, "at": true, "by": true, "in": true, "of": true, "on": true, "or": true, "up": true, "to": true, "and": true, "but": true, "for": true, "nor": true, "the": true, "from": true, "with": true, "de": true, "des": true, "en": true, "le": true, "la": true, "van": true, "zu": true, "ab": true, "um": true, "bei": true, "aus": true, "das": true, "der": true, "dem": true, "mir": true, "auf": true, "ist": true, "und": true, "uns": true, "von": true, "für": true, "ein": true, "eine": true, "sind": true, "sein": true, "auch": true, "mich": true, "oben": true, "nach": true, "über": true, "ohne": true, "eines": true, "nicht": true, "davor": true, "unter": true, "neben": true, "gegen": true, "viele": true, "trotz": true, "warst": true, "waren": true, "sehen": true, "später": true, "werden": true, "werdet": true, "dessen": true, "gesehen": true, "abseits": true, "entlang": true, "sichtbar": true, "entgegen": true, "zwischen": true, "oberhalb": true, "unterhalb": true, "bezüglich": true, "unsichtbar": true, "einschließlich": true, }
var SpecialWords = map[string]string{}/* 248 elements not displayed */
var StatesAU = LookupTable{
"Qld": "Queensland",
"QLD": "Queensland",
"NSW": "New South Wales",
"Vic": "Victoria",
"VIC": "Victoria",
"VI": "Victoria",
"ACT": "Australian Capital Territory",
"JBT": "Jervis Bay Territory",
"TAS": "Tasmania",
"WA": "Western Australia",
"NT": "Northern Territory",
"SA": "South Australia",
"AQ": "Australian Antarctic Territory",
"CX": "Christmas Island",
"CC": "Cocos Islands",
"HM": "Heard Island and McDonald Islands",
"NF": "Norfolk Island",
"AUS": "",
}
StatesAU maps common abbreviations for Australian provinces and territories.
var StatesBR = LookupTable{
"AC": "Acre",
"AL": "Alagoas",
"AM": "Amazonas",
"AP": "Amapá",
"BA": "Bahia",
"CE": "Ceará",
"DF": "Distrito Federal",
"ES": "Espírito Santo",
"GO": "Goiás",
"MA": "Maranhão",
"MG": "Minas Gerais",
"MS": "Mato Grosso do Sul",
"MT": "Mato Grosso",
"PA": "Pará",
"PB": "Paraíba",
"PE": "Pernambuco",
"PI": "Piauí",
"PR": "Paraná",
"RJ": "Rio de Janeiro",
"RN": "Rio Grande do Norte",
"RO": "Rondônia",
"RR": "Roraima",
"RS": "Rio Grande do Sul",
"SC": "Santa Catarina",
"SE": "Sergipe",
"SP": "São Paulo",
"TO": "Tocantins",
}
StatesBR maps common abbreviations for Brazilian states.
var StatesByCountry = LookupTableMap{ "au": StatesAU, "br": StatesBR, "ca": StatesCA, "de": StatesDE, "fr": StatesFR, "nz": StatesNZ, "us": StatesUS, }
StatesByCountry maps state names by country code.
var StatesCA = LookupTable{
"AB": "Alberta",
"BC": "British Columbia",
"NB": "New Brunswick",
"NL": "Newfoundland and Labrador",
"NS": "Nova Scotia",
"NT": "Northwest Territories",
"NU": "Nunavut",
"MB": "Manitoba",
"ON": "Ontario",
"PE": "Prince Edward Island",
"QC": "Quebec",
"SK": "Saskatchewan",
"YT": "Yukon",
}
StatesCA maps common abbreviations for Canadian provinces and territories.
var StatesDE = LookupTable{
"BW": "Baden-Württemberg",
"Ba-Wü": "Baden-Württemberg",
"Baden-Wurttemberg": "Baden-Württemberg",
"BY": "Bayern",
"Bavaria": "Bayern",
"BE": "Berlin",
"BER": "Berlin",
"BB": "Brandenburg",
"HB": "Bremen",
"HH": "Hamburg",
"HE": "Hessen",
"NI": "Niedersachsen",
"NDS": "Niedersachsen",
"Lower Saxony": "Niedersachsen",
"Lower-Saxony": "Niedersachsen",
"MV": "Mecklenburg-Vorpommern",
"NW": "Nordrhein-Westfalen",
"NRW": "Nordrhein-Westfalen",
"North Rhine-Westphalia": "Nordrhein-Westfalen",
"RP": "Rheinland-Pfalz",
"RLP": "Rheinland-Pfalz",
"Rhineland-Palatinate": "Rheinland-Pfalz",
"SL": "Saarland",
"SN": "Sachsen",
"Saxony": "Sachsen",
"ST": "Sachsen-Anhalt",
"Saxony-Anhalt": "Sachsen-Anhalt",
"Saxony Anhalt": "Sachsen-Anhalt",
"Sachsen Anhalt": "Sachsen-Anhalt",
"SH": "Schleswig-Holstein",
"TH": "Thüringen",
"Thuringia": "Thüringen",
"Thuringen": "Thüringen",
}
StatesDE maps common abbreviations for German states.
var StatesFR = LookupTable{
"France métropolitaine": "",
}
StatesFR maps common abbreviations for French states.
var StatesNZ = LookupTable{
"AUK": "Auckland",
"BOP": "Bay of Plenty",
"CAN": "Canterbury",
"GIS": "Gisborne",
"HKB": "Hawke's Bay",
"MBH": "Marlborough",
"MWT": "Manawatu-Wanganui",
"NSN": "Nelson",
"NTL": "Northland",
"OTA": "Otago",
"STL": "Southland",
"TAS": "Tasman",
"TKI": "Taranaki",
"WKO": "Waikato",
"WGN": "Wellington",
"WTC": "West Coast",
"CIT": "Chatham Islands",
"NZ": "",
}
StatesNZ maps common abbreviations for provinces and territories in New Zealand.
var StatesUS = LookupTable{
"AL": "Alabama",
"AK": "Alaska",
"AS": "American Samoa",
"AZ": "Arizona",
"AR": "Arkansas",
"CA": "California",
"CO": "Colorado",
"CT": "Connecticut",
"DE": "Delaware",
"DC": "District of Columbia",
"FM": "Federated States of Micronesia",
"FL": "Florida",
"GA": "Georgia",
"GU": "Guam",
"HI": "Hawaii",
"ID": "Idaho",
"IL": "Illinois",
"IN": "Indiana",
"IA": "Iowa",
"KS": "Kansas",
"KY": "Kentucky",
"LA": "Louisiana",
"MB": "Manitoba",
"ME": "Maine",
"MH": "Marshall Islands",
"MD": "Maryland",
"MA": "Massachusetts",
"MI": "Michigan",
"MN": "Minnesota",
"MS": "Mississippi",
"MO": "Missouri",
"MT": "Montana",
"NE": "Nebraska",
"NV": "Nevada",
"NH": "New Hampshire",
"NJ": "New Jersey",
"NM": "New Mexico",
"NY": "New York",
"NC": "North Carolina",
"ND": "North Dakota",
"NS": "Nova Scotia",
"MP": "Northern Mariana Islands",
"OH": "Ohio",
"OK": "Oklahoma",
"OR": "Oregon",
"PW": "Palau",
"PA": "Pennsylvania",
"PR": "Puerto Rico",
"P.R": "Puerto Rico",
"RI": "Rhode Island",
"SK": "Saskatchewan",
"SC": "South Carolina",
"SD": "South Dakota",
"TN": "Tennessee",
"TX": "Texas",
"UT": "Utah",
"VT": "Vermont",
"VI": "Virgin Islands",
"VA": "Virginia",
"WA": "Washington",
"WV": "West Virginia",
"WI": "Wisconsin",
"WY": "Wyoming",
}
StatesUS maps common abbreviations for US states.
var StopWords = map[string]bool{}/* 4683 elements not displayed */
StopWords contains a list of stopwords for full-text indexing.
var TitlesAndRanks = map[string]bool{ "emperor": true, "caliph": true, "king": true, "kings": true, "shahanshah": true, "padishah": true, "sultan": true, "chakravarti": true, "chhatrapati": true, "samrat": true, "khagan": true, "high": true, "great": true, "maharaja": true, "beg": true, "khan": true, "amir": true, "al-umara": true, "bek": true, "malik": true, "emir": true, "hakim": true, "sharif": true, "shah": true, "shirvanshah": true, "raja": true, "dey": true, "duke": true, "khedive": true, "nawab": true, "wāli": true, "nizam": true, "crown": true, "prince": true, "shahzada": true, "mirza": true, "awabzada": true, "yuvraj": true, "vali": true, "mir": true, "ikhshid": true, "pasha": true, "thakur": true, "babu": true, "saheb": true, "sardar": true, "rajkumar": true, "sahibzada": true, "nawabzada": true, "earl": true, "mankari": true, "dewan": true, "rao": true, "bahadur": true, "rai": true, "beylerbey": true, "atabeg": true, "viscount": true, "zamindar": true, "sahib": true, "bey": true, "begum": true, "begzada": true, "baron": true, "lala": true, "agha": true, "hazinedar": true, "queen": true, "pope": true, "marquess": true, "baroness": true, "princess": true, "lord": true, "lady": true, "supreme": true, "leader": true, "chief": true, "excellency": true, "doctor": true, "president": true, "marshal": true, "conqueror": true, "general": true, "admiral": true, "guide": true, "executive": true, "bishop": true, "highness": true, "his": true, "her": true, "monsignor": true, "sire": true, }
TitlesAndRanks contains a list of name titles and ranks in lowercase, see https://en.wikipedia.org/wiki/List_of_titles.
var UnknownCountryCode = "zz"
var UnknownStateCode = "zz"
var YearRegexp = regexp.MustCompile("\\d{4,5}")
Functions ¶
func AddToWords ¶
AddToWords add words to a string slice and returns the sorted result.
func AppendName ¶
AppendName appends a name to an existing name.
func Clip ¶
Clip shortens a string to the given number of runes, and removes all leading and trailing white space.
func ContainsASCIILetters ¶
ContainsASCIILetters reports if the string only contains ascii chars without whitespace, numbers, and punctuation marks.
func ContainsLetters ¶
ContainsLetters reports whether the string only contains letters.
func ContainsNumber ¶
ContainsNumber returns true if string contains a number.
func ContainsSymbols ¶
ContainsSymbols reports whether the string only contains symbolic characters.
func CountryCode ¶
CountryCode tries to find a matching country code for a given string e.g. from a file oder directory name.
func DateFromFilePath ¶
DateFromFilePath returns a string as time or the zero time instant in case it can not be converted.
func DateTimeDefault ¶
DateTimeDefault tests if the datetime string is not empty and not a default value.
func EmptyDateTime ¶
EmptyDateTime tests if the string is empty or matches an unknown time pattern.
func ExpandYear ¶
ExpandYear converts a string to a year and expands two-digit years if possible.
func FileTitle ¶
FileTitle returns the string with the first characters of each word converted to uppercase.
func FilenameKeywords ¶
FilenameKeywords returns a slice of keywords without stopwords.
func FilenameWords ¶
FilenameWords returns a slice of words with at least 3 characters from a string ("ile", "france").
func FloatRange ¶
FloatRange parses a string as floating point number range and returns an error if it's not a valid range.
func IntRange ¶
IntRange parses a string as integer range and returns an error if it's not a valid range.
func Is ¶
func Is(rangeTab *unicode.RangeTable, s string) bool
Is reports whether the all string runes are in the specified range.
func LogParam ¶
LogParam sanitizes strings created from user input in response to the log4j debacle.
func LogParamLower ¶
LogParamLower sanitizes strings created from user input and converts them to lowercase.
func MergeWords ¶
MergeWords merges two keyword strings separated by ", ".
func NTimes ¶
NTimes converts an integer to a string in the format "n times" or returns an empty string if n is 0.
func NameKeywords ¶
NameKeywords returns a list of unique, lowercase keywords based on a person's names and aliases.
func NormalizeUtcOffset ¶
NormalizeUtcOffset returns a normalized UTC time offset string.
func QueryTooShort ¶
QueryTooShort tests if a search query is too short.
func QuoteLower ¶
QuoteLower converts a string to lowercase and adds quotation marks if needed.
func RemoveFromWords ¶
RemoveFromWords removes words from a string slice and returns the sorted result.
func ReplaceSpaces ¶
ReplaceSpaces replaces all spaces with another string.
func SearchTerms ¶
SearchTerms returns a bool map with all terms as key.
func SortCaseInsensitive ¶
func SortCaseInsensitive(words []string)
SortCaseInsensitive performs a case-insensitive slice sort.
func StopwordsOnly ¶
StopwordsOnly tests if the string contains stopwords only.
func TimeOffset ¶
TimeOffset returns the UTC time offset in seconds or an error if it is invalid.
func Title ¶
Title returns the string with the first characters of each word converted to uppercase.
func UniqueKeywords ¶
UniqueKeywords returns a slice of unique and sorted keywords without stopwords.
func UniqueNames ¶
UniqueNames removes exact duplicates from a list of strings without changing their order.
func UniqueWords ¶
UniqueWords sorts and filters a string slice for unique words.
func UnknownWord ¶
UnknownWord returns true if the string does not seem to be a real word.
func UpperFirst ¶
UpperFirst returns the string with the first character converted to uppercase.
func Words ¶
Words returns a slice of words with at least 3 characters from a string, dashes count as character ("ile-de-france").
Types ¶
type LookupTableMap ¶
type LookupTableMap map[string]LookupTable
LookupTableMap represents a map of string lookup tables.
Source Files ¶
- clip.go
- compare.go
- contains.go
- countries.go
- date.go
- datetime.go
- datetime_filepath.go
- datetime_year.go
- empty.go
- en.go
- file_title.go
- float.go
- int.go
- is.go
- log.go
- lookuptable.go
- names.go
- names_lists.go
- names_parser.go
- ntimes.go
- numeric.go
- places.go
- query.go
- quote.go
- search.go
- separator.go
- shortwords.go
- slug.go
- smallwords.go
- specialwords.go
- states.go
- stopwords.go
- timezone.go
- title.go
- titles.go
- txt.go
- upperfirst.go
- words.go