Documentation
¶
Overview ¶
Package cld2 implements language detection using the Compact Language Detector.
This package includes the relevant sources from the cld2 project, so it doesn't require any external dependencies. For more information about CLD2, see https://github.com/CLD2Owners/cld2
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type Estimate ¶
type Estimate struct {
Language Language
Percent int // text percentage 0..100 of the top 3 languages.
// NormScore is internal language scores as a ratio to normal score for real text in that language.
// Scores close to 1.0 indicate normal text, while scores far away
// from 1.0 indicate badly-skewed text or gibberish.
NormScore float64
}
Single Language estimate
type Language ¶
type Language uint16
Language is a single language. Note that the zero value is "ENGLISH".
const ( ENGLISH Language = 0 // en DANISH Language = 1 // da DUTCH Language = 2 // nl FINNISH Language = 3 // fi FRENCH Language = 4 // fr GERMAN Language = 5 // de HEBREW Language = 6 // iw ITALIAN Language = 7 // it JAPANESE Language = 8 // ja KOREAN Language = 9 // ko NORWEGIAN Language = 10 // no POLISH Language = 11 // pl PORTUGUESE Language = 12 // pt RUSSIAN Language = 13 // ru SPANISH Language = 14 // es SWEDISH Language = 15 // sv CHINESE Language = 16 // zh CZECH Language = 17 // cs GREEK Language = 18 // el ICELANDIC Language = 19 // is LATVIAN Language = 20 // lv LITHUANIAN Language = 21 // lt ROMANIAN Language = 22 // ro HUNGARIAN Language = 23 // hu ESTONIAN Language = 24 // et TG_UNKNOWN_LANGUAGE Language = 25 // xxx UNKNOWN_LANGUAGE Language = 26 // un BULGARIAN Language = 27 // bg CROATIAN Language = 28 // hr SERBIAN Language = 29 // sr IRISH Language = 30 // ga GALICIAN Language = 31 // gl TAGALOG Language = 32 // tl TURKISH Language = 33 // tr UKRAINIAN Language = 34 // uk HINDI Language = 35 // hi MACEDONIAN Language = 36 // mk BENGALI Language = 37 // bn INDONESIAN Language = 38 // id LATIN Language = 39 // la MALAY Language = 40 // ms MALAYALAM Language = 41 // ml WELSH Language = 42 // cy NEPALI Language = 43 // ne TELUGU Language = 44 // te ALBANIAN Language = 45 // sq TAMIL Language = 46 // ta BELARUSIAN Language = 47 // be JAVANESE Language = 48 // jw OCCITAN Language = 49 // oc URDU Language = 50 // ur BIHARI Language = 51 // bh GUJARATI Language = 52 // gu THAI Language = 53 // th ARABIC Language = 54 // ar CATALAN Language = 55 // ca ESPERANTO Language = 56 // eo BASQUE Language = 57 // eu INTERLINGUA Language = 58 // ia KANNADA Language = 59 // kn PUNJABI Language = 60 // pa SCOTS_GAELIC Language = 61 // gd SWAHILI Language = 62 // sw SLOVENIAN Language = 63 // sl MARATHI Language = 64 // mr MALTESE Language = 65 // mt VIETNAMESE Language = 66 // vi FRISIAN Language = 67 // fy SLOVAK Language = 68 // sk CHINESE_T Language = 69 // zh-Hant FAROESE Language = 70 // fo SUNDANESE Language = 71 // su UZBEK Language = 72 // uz AMHARIC Language = 73 // am AZERBAIJANI Language = 74 // az GEORGIAN Language = 75 // ka TIGRINYA Language = 76 // ti PERSIAN Language = 77 // fa BOSNIAN Language = 78 // bs SINHALESE Language = 79 // si NORWEGIAN_N Language = 80 // nn X_81 Language = 81 // X_82 Language = 82 // XHOSA Language = 83 // xh ZULU Language = 84 // zu GUARANI Language = 85 // gn SESOTHO Language = 86 // st TURKMEN Language = 87 // tk KYRGYZ Language = 88 // ky BRETON Language = 89 // br TWI Language = 90 // tw YIDDISH Language = 91 // yi X_92 Language = 92 // SOMALI Language = 93 // so UIGHUR Language = 94 // ug KURDISH Language = 95 // ku MONGOLIAN Language = 96 // mn ARMENIAN Language = 97 // hy LAOTHIAN Language = 98 // lo SINDHI Language = 99 // sd RHAETO_ROMANCE Language = 100 // rm AFRIKAANS Language = 101 // af LUXEMBOURGISH Language = 102 // lb BURMESE Language = 103 // my KHMER Language = 104 // km TIBETAN Language = 105 // bo DHIVEHI Language = 106 // dv CHEROKEE Language = 107 // chr SYRIAC Language = 108 // syr LIMBU Language = 109 // lif ORIYA Language = 110 // or ASSAMESE Language = 111 // as CORSICAN Language = 112 // co INTERLINGUE Language = 113 // ie KAZAKH Language = 114 // kk LINGALA Language = 115 // ln X_116 Language = 116 // PASHTO Language = 117 // ps QUECHUA Language = 118 // qu SHONA Language = 119 // sn TAJIK Language = 120 // tg TATAR Language = 121 // tt TONGA Language = 122 // to YORUBA Language = 123 // yo X_124 Language = 124 // X_125 Language = 125 // X_126 Language = 126 // X_127 Language = 127 // MAORI Language = 128 // mi WOLOF Language = 129 // wo ABKHAZIAN Language = 130 // ab AFAR Language = 131 // aa AYMARA Language = 132 // ay BASHKIR Language = 133 // ba BISLAMA Language = 134 // bi DZONGKHA Language = 135 // dz FIJIAN Language = 136 // fj GREENLANDIC Language = 137 // kl HAUSA Language = 138 // ha HAITIAN_CREOLE Language = 139 // ht INUPIAK Language = 140 // ik INUKTITUT Language = 141 // iu KASHMIRI Language = 142 // ks KINYARWANDA Language = 143 // rw MALAGASY Language = 144 // mg NAURU Language = 145 // na OROMO Language = 146 // om RUNDI Language = 147 // rn SAMOAN Language = 148 // sm SANGO Language = 149 // sg SANSKRIT Language = 150 // sa SISWANT Language = 151 // ss TSONGA Language = 152 // ts TSWANA Language = 153 // tn VOLAPUK Language = 154 // vo ZHUANG Language = 155 // za KHASI Language = 156 // kha SCOTS Language = 157 // sco GANDA Language = 158 // lg MANX Language = 159 // gv MONTENEGRIN Language = 160 // sr-ME AKAN Language = 161 // ak IGBO Language = 162 // ig MAURITIAN_CREOLE Language = 163 // mfe HAWAIIAN Language = 164 // haw CEBUANO Language = 165 // ceb EWE Language = 166 // ee GA Language = 167 // gaa HMONG Language = 168 // blu KRIO Language = 169 // kri LOZI Language = 170 // loz LUBA_LULUA Language = 171 // lua LUO_KENYA_AND_TANZANIA Language = 172 // luo NEWARI Language = 173 // new NYANJA Language = 174 // ny OSSETIAN Language = 175 // os PAMPANGA Language = 176 // pam PEDI Language = 177 // nso RAJASTHANI Language = 178 // raj SESELWA Language = 179 // crs TUMBUKA Language = 180 // tum VENDA Language = 181 // ve WARAY_PHILIPPINES Language = 182 // war X_183 Language = 183 // X_184 Language = 184 // X_185 Language = 185 // X_186 Language = 186 // X_187 Language = 187 // X_188 Language = 188 // X_189 Language = 189 // X_190 Language = 190 // X_191 Language = 191 // X_192 Language = 192 // X_193 Language = 193 // X_194 Language = 194 // X_195 Language = 195 // X_196 Language = 196 // X_197 Language = 197 // X_198 Language = 198 // X_199 Language = 199 // X_200 Language = 200 // X_201 Language = 201 // X_202 Language = 202 // X_203 Language = 203 // X_204 Language = 204 // X_205 Language = 205 // X_206 Language = 206 // X_207 Language = 207 // X_208 Language = 208 // X_209 Language = 209 // X_210 Language = 210 // X_211 Language = 211 // X_212 Language = 212 // X_213 Language = 213 // X_214 Language = 214 // X_215 Language = 215 // X_216 Language = 216 // X_217 Language = 217 // X_218 Language = 218 // X_219 Language = 219 // X_220 Language = 220 // X_221 Language = 221 // X_222 Language = 222 // X_223 Language = 223 // X_224 Language = 224 // X_225 Language = 225 // X_226 Language = 226 // X_227 Language = 227 // X_228 Language = 228 // X_229 Language = 229 // X_230 Language = 230 // X_231 Language = 231 // X_232 Language = 232 // X_233 Language = 233 // X_234 Language = 234 // X_235 Language = 235 // X_236 Language = 236 // X_237 Language = 237 // X_238 Language = 238 // X_239 Language = 239 // X_240 Language = 240 // X_241 Language = 241 // X_242 Language = 242 // X_243 Language = 243 // X_244 Language = 244 // X_245 Language = 245 // X_246 Language = 246 // X_247 Language = 247 // X_248 Language = 248 // X_249 Language = 249 // X_250 Language = 250 // X_251 Language = 251 // X_252 Language = 252 // X_253 Language = 253 // X_254 Language = 254 // X_255 Language = 255 // X_256 Language = 256 // X_257 Language = 257 // X_258 Language = 258 // X_259 Language = 259 // X_260 Language = 260 // X_261 Language = 261 // X_262 Language = 262 // X_263 Language = 263 // X_264 Language = 264 // X_265 Language = 265 // X_266 Language = 266 // X_267 Language = 267 // X_268 Language = 268 // X_269 Language = 269 // X_270 Language = 270 // X_271 Language = 271 // X_272 Language = 272 // X_273 Language = 273 // X_274 Language = 274 // X_275 Language = 275 // X_276 Language = 276 // X_277 Language = 277 // X_278 Language = 278 // X_279 Language = 279 // X_280 Language = 280 // X_281 Language = 281 // X_282 Language = 282 // X_283 Language = 283 // X_284 Language = 284 // X_285 Language = 285 // X_286 Language = 286 // X_287 Language = 287 // X_288 Language = 288 // X_289 Language = 289 // X_290 Language = 290 // X_291 Language = 291 // X_292 Language = 292 // X_293 Language = 293 // X_294 Language = 294 // X_295 Language = 295 // X_296 Language = 296 // X_297 Language = 297 // X_298 Language = 298 // X_299 Language = 299 // X_300 Language = 300 // X_301 Language = 301 // X_302 Language = 302 // X_303 Language = 303 // X_304 Language = 304 // X_305 Language = 305 // X_306 Language = 306 // X_307 Language = 307 // X_308 Language = 308 // X_309 Language = 309 // X_310 Language = 310 // X_311 Language = 311 // X_312 Language = 312 // X_313 Language = 313 // X_314 Language = 314 // X_315 Language = 315 // X_316 Language = 316 // X_317 Language = 317 // X_318 Language = 318 // X_319 Language = 319 // X_320 Language = 320 // X_321 Language = 321 // X_322 Language = 322 // X_323 Language = 323 // X_324 Language = 324 // X_325 Language = 325 // X_326 Language = 326 // X_327 Language = 327 // X_328 Language = 328 // X_329 Language = 329 // X_330 Language = 330 // X_331 Language = 331 // X_332 Language = 332 // X_333 Language = 333 // X_334 Language = 334 // X_335 Language = 335 // X_336 Language = 336 // X_337 Language = 337 // X_338 Language = 338 // X_339 Language = 339 // X_340 Language = 340 // X_341 Language = 341 // X_342 Language = 342 // X_343 Language = 343 // X_344 Language = 344 // X_345 Language = 345 // X_346 Language = 346 // X_347 Language = 347 // X_348 Language = 348 // X_349 Language = 349 // X_350 Language = 350 // X_351 Language = 351 // X_352 Language = 352 // X_353 Language = 353 // X_354 Language = 354 // X_355 Language = 355 // X_356 Language = 356 // X_357 Language = 357 // X_358 Language = 358 // X_359 Language = 359 // X_360 Language = 360 // X_361 Language = 361 // X_362 Language = 362 // X_363 Language = 363 // X_364 Language = 364 // X_365 Language = 365 // X_366 Language = 366 // X_367 Language = 367 // X_368 Language = 368 // X_369 Language = 369 // X_370 Language = 370 // X_371 Language = 371 // X_372 Language = 372 // X_373 Language = 373 // X_374 Language = 374 // X_375 Language = 375 // X_376 Language = 376 // X_377 Language = 377 // X_378 Language = 378 // X_379 Language = 379 // X_380 Language = 380 // X_381 Language = 381 // X_382 Language = 382 // X_383 Language = 383 // X_384 Language = 384 // X_385 Language = 385 // X_386 Language = 386 // X_387 Language = 387 // X_388 Language = 388 // X_389 Language = 389 // X_390 Language = 390 // X_391 Language = 391 // X_392 Language = 392 // X_393 Language = 393 // X_394 Language = 394 // X_395 Language = 395 // X_396 Language = 396 // X_397 Language = 397 // X_398 Language = 398 // X_399 Language = 399 // X_400 Language = 400 // X_401 Language = 401 // X_402 Language = 402 // X_403 Language = 403 // X_404 Language = 404 // X_405 Language = 405 // X_406 Language = 406 // X_407 Language = 407 // X_408 Language = 408 // X_409 Language = 409 // X_410 Language = 410 // X_411 Language = 411 // X_412 Language = 412 // X_413 Language = 413 // X_414 Language = 414 // X_415 Language = 415 // X_416 Language = 416 // X_417 Language = 417 // X_418 Language = 418 // X_419 Language = 419 // X_420 Language = 420 // X_421 Language = 421 // X_422 Language = 422 // X_423 Language = 423 // X_424 Language = 424 // X_425 Language = 425 // X_426 Language = 426 // X_427 Language = 427 // X_428 Language = 428 // X_429 Language = 429 // X_430 Language = 430 // X_431 Language = 431 // X_432 Language = 432 // X_433 Language = 433 // X_434 Language = 434 // X_435 Language = 435 // X_436 Language = 436 // X_437 Language = 437 // X_438 Language = 438 // X_439 Language = 439 // X_440 Language = 440 // X_441 Language = 441 // X_442 Language = 442 // X_443 Language = 443 // X_444 Language = 444 // X_445 Language = 445 // X_446 Language = 446 // X_447 Language = 447 // X_448 Language = 448 // X_449 Language = 449 // X_450 Language = 450 // X_451 Language = 451 // X_452 Language = 452 // X_453 Language = 453 // X_454 Language = 454 // X_455 Language = 455 // X_456 Language = 456 // X_457 Language = 457 // X_458 Language = 458 // X_459 Language = 459 // X_460 Language = 460 // X_461 Language = 461 // X_462 Language = 462 // X_463 Language = 463 // X_464 Language = 464 // X_465 Language = 465 // X_466 Language = 466 // X_467 Language = 467 // X_468 Language = 468 // X_469 Language = 469 // X_470 Language = 470 // X_471 Language = 471 // X_472 Language = 472 // X_473 Language = 473 // X_474 Language = 474 // X_475 Language = 475 // X_476 Language = 476 // X_477 Language = 477 // X_478 Language = 478 // X_479 Language = 479 // X_480 Language = 480 // X_481 Language = 481 // X_482 Language = 482 // X_483 Language = 483 // X_484 Language = 484 // X_485 Language = 485 // X_486 Language = 486 // X_487 Language = 487 // X_488 Language = 488 // X_489 Language = 489 // X_490 Language = 490 // X_491 Language = 491 // X_492 Language = 492 // X_493 Language = 493 // X_494 Language = 494 // X_495 Language = 495 // X_496 Language = 496 // X_497 Language = 497 // X_498 Language = 498 // X_499 Language = 499 // X_500 Language = 500 // X_501 Language = 501 // X_502 Language = 502 // X_503 Language = 503 // X_504 Language = 504 // X_505 Language = 505 // NDEBELE Language = 506 // nr X_BORK_BORK_BORK Language = 507 // zzb X_PIG_LATIN Language = 508 // zzp X_HACKER Language = 509 // zzh X_KLINGON Language = 510 // tlh X_ELMER_FUDD Language = 511 // zze X_Common Language = 512 // xx-Zyyy X_Latin Language = 513 // xx-Latn X_Greek Language = 514 // xx-Grek X_Cyrillic Language = 515 // xx-Cyrl X_Armenian Language = 516 // xx-Armn X_Hebrew Language = 517 // xx-Hebr X_Arabic Language = 518 // xx-Arab X_Syriac Language = 519 // xx-Syrc X_Thaana Language = 520 // xx-Thaa X_Devanagari Language = 521 // xx-Deva X_Bengali Language = 522 // xx-Beng X_Gurmukhi Language = 523 // xx-Guru X_Gujarati Language = 524 // xx-Gujr X_Oriya Language = 525 // xx-Orya X_Tamil Language = 526 // xx-Taml X_Telugu Language = 527 // xx-Telu X_Kannada Language = 528 // xx-Knda X_Malayalam Language = 529 // xx-Mlym X_Sinhala Language = 530 // xx-Sinh X_Thai Language = 531 // xx-Thai X_Lao Language = 532 // xx-Laoo X_Tibetan Language = 533 // xx-Tibt X_Myanmar Language = 534 // xx-Mymr X_Georgian Language = 535 // xx-Geor X_Hangul Language = 536 // xx-Hang X_Ethiopic Language = 537 // xx-Ethi X_Cherokee Language = 538 // xx-Cher X_Canadian_Aboriginal Language = 539 // xx-Cans X_Ogham Language = 540 // xx-Ogam X_Runic Language = 541 // xx-Runr X_Khmer Language = 542 // xx-Khmr X_Mongolian Language = 543 // xx-Mong X_Hiragana Language = 544 // xx-Hira X_Katakana Language = 545 // xx-Kana X_Bopomofo Language = 546 // xx-Bopo X_Han Language = 547 // xx-Hani X_Yi Language = 548 // xx-Yiii X_Old_Italic Language = 549 // xx-Ital X_Gothic Language = 550 // xx-Goth X_Deseret Language = 551 // xx-Dsrt X_Inherited Language = 552 // xx-Qaai X_Tagalog Language = 553 // xx-Tglg X_Hanunoo Language = 554 // xx-Hano X_Buhid Language = 555 // xx-Buhd X_Tagbanwa Language = 556 // xx-Tagb X_Limbu Language = 557 // xx-Limb X_Tai_Le Language = 558 // xx-Tale X_Linear_B Language = 559 // xx-Linb X_Ugaritic Language = 560 // xx-Ugar X_Shavian Language = 561 // xx-Shaw X_Osmanya Language = 562 // xx-Osma X_Cypriot Language = 563 // xx-Cprt X_Braille Language = 564 // xx-Brai X_Buginese Language = 565 // xx-Bugi X_Coptic Language = 566 // xx-Copt X_New_Tai_Lue Language = 567 // xx-Talu X_Glagolitic Language = 568 // xx-Glag X_Tifinagh Language = 569 // xx-Tfng X_Syloti_Nagri Language = 570 // xx-Sylo X_Old_Persian Language = 571 // xx-Xpeo X_Kharoshthi Language = 572 // xx-Khar X_Balinese Language = 573 // xx-Bali X_Cuneiform Language = 574 // xx-Xsux X_Phoenician Language = 575 // xx-Phnx X_Phags_Pa Language = 576 // xx-Phag X_Nko Language = 577 // xx-Nkoo X_Sundanese Language = 578 // xx-Sund X_Lepcha Language = 579 // xx-Lepc X_Ol_Chiki Language = 580 // xx-Olck X_Vai Language = 581 // xx-Vaii X_Saurashtra Language = 582 // xx-Saur X_Kayah_Li Language = 583 // xx-Kali X_Rejang Language = 584 // xx-Rjng X_Lycian Language = 585 // xx-Lyci X_Carian Language = 586 // xx-Cari X_Lydian Language = 587 // xx-Lydi X_Cham Language = 588 // xx-Cham X_Tai_Tham Language = 589 // xx-Lana X_Tai_Viet Language = 590 // xx-Tavt X_Avestan Language = 591 // xx-Avst X_Egyptian_Hieroglyphs Language = 592 // xx-Egyp X_Samaritan Language = 593 // xx-Samr X_Lisu Language = 594 // xx-Lisu X_Bamum Language = 595 // xx-Bamu X_Javanese Language = 596 // xx-Java X_Meetei_Mayek Language = 597 // xx-Mtei X_Imperial_Aramaic Language = 598 // xx-Armi X_Old_South_Arabian Language = 599 // xx-Sarb X_Inscriptional_Parthian Language = 600 // xx-Prti X_Inscriptional_Pahlavi Language = 601 // xx-Phli X_Old_Turkic Language = 602 // xx-Orkh X_Kaithi Language = 603 // xx-Kthi X_Batak Language = 604 // xx-Batk X_Brahmi Language = 605 // xx-Brah X_Mandaic Language = 606 // xx-Mand X_Chakma Language = 607 // xx-Cakm X_Meroitic_Cursive Language = 608 // xx-Merc X_Meroitic_Hieroglyphs Language = 609 // xx-Mero X_Miao Language = 610 // xx-Plrd X_Sharada Language = 611 // xx-Shrd X_Sora_Sompeng Language = 612 // xx-Sora X_Takri Language = 613 // xx-Takr NUM_LANGUAGES Language = 614 )
Copied from "generated_language.h"
func DetectLang ¶
DetectLang returns the language code for detected language in the given text. ENGLISH is returned if the language cannot be detected.
func LanguageFromCode ¶
LanguageFromCode returns the language associated with the code. Returns UNKNOWN_LANGUAGE if the code isn't known.
func NewLanguage ¶
NewLanguage supplies a safe way of returning a uint16 to a Language. If an invalid id is supplied, UNKNOWN_LANGUAGE is returned.
type Languages ¶
type Languages struct {
Estimates []Estimate // Possible languages returned in order of confidence
TextBytes int // the amount of non-tag/letters-only text found
Reliable bool // Does CLD2 see the result as reliable?
}
Languages are probable languages of the supplied text
func DetectThree ¶
DetectThree returns up to three language guesses. Extended languages are enabled. Unknown languages are removed from the resultset.