Documentation
¶
Overview ¶
Package data contains only auto-generated data-structures for all the language identification strategies from the Linguist project sources.
Index ¶
Constants ¶
This section is empty.
Variables ¶
var ContentHeuristics = map[string]*Heuristics{}/* 137 elements not displayed */
var DocumentationMatchers = []regex.EnryRegexp{ regex.MustCompile(`^[Dd]ocs?/`), regex.MustCompile(`(^|/)[Dd]ocumentation/`), regex.MustCompile(`(^|/)[Gg]roovydoc/`), regex.MustCompile(`(^|/)[Jj]avadoc/`), regex.MustCompile(`^[Mm]an/`), regex.MustCompile(`^[Ee]xamples/`), regex.MustCompile(`^[Dd]emos?/`), regex.MustCompile(`(^|/)inst/doc/`), regex.MustCompile(`(^|/)CITATION(\.cff|(S)?(\.(bib|md))?)$`), regex.MustCompile(`(^|/)CHANGE(S|LOG)?(\.|$)`), regex.MustCompile(`(^|/)CONTRIBUTING(\.|$)`), regex.MustCompile(`(^|/)COPYING(\.|$)`), regex.MustCompile(`(^|/)INSTALL(\.|$)`), regex.MustCompile(`(^|/)LICEN[CS]E(\.|$)`), regex.MustCompile(`(^|/)[Ll]icen[cs]e(\.|$)`), regex.MustCompile(`(^|/)README(\.|$)`), regex.MustCompile(`(^|/)[Rr]eadme(\.|$)`), regex.MustCompile(`^[Ss]amples?/`), }
var ExtensionsByLanguage = map[string][]string{}/* 705 elements not displayed */
var FastVendorMatcher = regex.MustCompile(`(?:^(?:(?:[Dd]ependencies/)|(?:debian/)|(?:deps/)|(?:rebar$)))|(?:(?:^|/)(?:(?:BuddyBuildSDK\.framework/)|(?:Carthage/)|(?:Chart\.js$)|(?:Control\.FullScreen\.css)|(?:Control\.FullScreen\.js)|(?:Crashlytics\.framework/)|(?:Fabric\.framework/)|(?:Godeps/_workspace/)|(?:Jenkinsfile$)|(?:Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$)|(?:MathJax/)|(?:MochiKit\.js$)|(?:RealmSwift\.framework)|(?:Realm\.framework)|(?:Sparkle/)|(?:Vagrantfile$)|(?:[Bb]ourbon/.*\.(css|less|scss|styl)$)|(?:[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo))|(?:[Ee]xtern(als?)?/)|(?:[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$)|(?:[Pp]ackages\/.+\.\d+\/)|(?:[Ss]pecs?/fixtures/)|(?:[Tt]ests?/fixtures/)|(?:[Vv]+endor/)|(?:\.[Dd][Ss]_[Ss]tore$)|(?:\.gitattributes$)|(?:\.github/)|(?:\.gitignore$)|(?:\.gitmodules$)|(?:\.gitpod\.Dockerfile$)|(?:\.google_apis/)|(?:\.indent\.pro)|(?:\.mvn/wrapper/)|(?:\.obsidian/)|(?:\.osx$)|(?:\.sublime-project)|(?:\.sublime-workspace)|(?:\.teamcity/)|(?:\.vscode/)|(?:\.yarn/plugins/)|(?:\.yarn/releases/)|(?:\.yarn/sdks/)|(?:\.yarn/unplugged/)|(?:\.yarn/versions/)|(?:_esy$)|(?:ace-builds/)|(?:aclocal\.m4)|(?:activator$)|(?:activator\.bat$)|(?:admin_media/)|(?:angular([^.]*)\.js$)|(?:animate\.(css|less|scss|styl)$)|(?:bootbox\.js)|(?:bootstrap([^/.]*)(\..*)?\.(js|css|less|scss|styl)$)|(?:bootstrap-datepicker/)|(?:bower_components/)|(?:bulma\.(css|sass|scss)$)|(?:cache/)|(?:ckeditor\.js$)|(?:config\.guess$)|(?:config\.sub$)|(?:configure$)|(?:controls\.js$)|(?:cordova([^.]*)\.js$)|(?:cordova\-\d\.\d(\.\d)?\.js$)|(?:cpplint\.py)|(?:custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$)|(?:dist/)|(?:docs?/_?(build|themes?|templates?|static)/)|(?:dojo\.js$)|(?:dotnet-install\.(ps1|sh)$)|(?:dragdrop\.js$)|(?:effects\.js$)|(?:env/)|(?:erlang\.mk)|(?:extjs/.*?\.html$)|(?:extjs/.*?\.js$)|(?:extjs/.*?\.properties$)|(?:extjs/.*?\.txt$)|(?:extjs/.*?\.xml$)|(?:extjs/\.sencha/)|(?:extjs/builds/)|(?:extjs/cmd/)|(?:extjs/docs/)|(?:extjs/examples/)|(?:extjs/locale/)|(?:extjs/packages/)|(?:extjs/plugins/)|(?:extjs/resources/)|(?:extjs/src/)|(?:extjs/welcome/)|(?:fabfile\.py$)|(?:flow-typed/.*\.js$)|(?:font-?awesome/.*\.(css|less|scss|styl)$)|(?:font-?awesome\.(css|less|scss|styl)$)|(?:fontello(.*?)\.css$)|(?:foundation(\..*)?\.js$)|(?:foundation\.(css|less|scss|styl)$)|(?:fuelux\.js)|(?:gradle/wrapper/)|(?:gradlew$)|(?:gradlew\.bat$)|(?:html5shiv\.js$)|(?:inst/extdata/)|(?:jquery([^.]*)\.js$)|(?:jquery([^.]*)\.unobtrusive\-ajax\.js$)|(?:jquery([^.]*)\.validate(\.unobtrusive)?\.js$)|(?:jquery\-\d\.\d+(\.\d+)?\.js$)|(?:jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$)|(?:jquery\.(ui|effects)\.([^.]*)\.(js|css)$)|(?:jquery\.dataTables\.js)|(?:jquery\.fancybox\.(js|css))|(?:jquery\.fileupload(-\w+)?\.js$)|(?:jquery\.fn\.gantt\.js)|(?:knockout-(\d+\.){3}(debug\.)?js$)|(?:leaflet\.draw-src\.js)|(?:leaflet\.draw\.css)|(?:leaflet\.spin\.js)|(?:libtool\.m4)|(?:ltoptions\.m4)|(?:ltsugar\.m4)|(?:ltversion\.m4)|(?:lt~obsolete\.m4)|(?:materialize\.(css|less|scss|styl|js)$)|(?:modernizr\-\d\.\d+(\.\d+)?\.js$)|(?:modernizr\.custom\.\d+\.js$)|(?:mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$)|(?:mvnw$)|(?:mvnw\.cmd$)|(?:node_modules/)|(?:normalize\.(css|less|scss|styl)$)|(?:octicons\.css)|(?:pdf\.worker\.js)|(?:proguard-rules\.pro$)|(?:proguard\.pro$)|(?:prototype(.*)\.js$)|(?:puphpet/)|(?:react(-[^.]*)?\.js$)|(?:run\.n$)|(?:select2/.*\.(css|scss|js)$)|(?:shBrush([^.]*)\.js$)|(?:shCore\.js$)|(?:shLegacy\.js$)|(?:skeleton\.(css|less|scss|styl)$)|(?:slick\.\w+.js$)|(?:sprockets-octicons\.scss)|(?:testdata/)|(?:tiny_mce([^.]*)\.js$)|(?:tiny_mce/(langs|plugins|themes|utils))|(?:vendors?/)|(?:waf$)|(?:wicket-leaflet\.js)|(?:yahoo-([^.]*)\.js$)|(?:yui([^.]*)\.js$)))|(?:(.*?)\.d\.ts$)|(?:(3rd|[Tt]hird)[-_]?[Pp]arty/)|(?:([^\s]*)import\.(css|less|scss|styl)$)|(?:(\.|-)min\.(js|css)$)|(?:(^|\/)d3(\.v\d+)?([^.]*)\.js$)|(?:-vsdoc\.js$)|(?:\.imageset/)|(?:\.intellisense\.js$)|(?:\.xctemplate/)`)
FastVendorMatcher is equivalent to matching any of the VendorMatchers.
var GeneratedCodeExtensions = map[string]struct{}{
".nib": {},
".xcworkspacedata": {},
".xcuserstate": {},
}
GeneratedCodeExtensions contains all extensions that belong to generated files for sure.
var GeneratedCodeMatchers = []GeneratedCodeMatcher{
isMinifiedFile,
hasSourceMapReference,
isSourceMap,
isCompiledCoffeeScript,
isGeneratedNetDocfile,
isGeneratedJavaScriptPEGParser,
isGeneratedPostScript,
isGeneratedGo,
isGeneratedProtobufFromGo,
isGeneratedProtobuf,
isGeneratedJavaScriptProtocolBuffer,
isGeneratedApacheThrift,
isGeneratedJNIHeader,
isVCRCassette,
isCompiledCythonFile,
isGeneratedModule,
isGeneratedUnity3DMeta,
isGeneratedRacc,
isGeneratedJFlex,
isGeneratedGrammarKit,
isGeneratedRoxygen2,
isGeneratedJison,
isGeneratedGRPCCpp,
isGeneratedDart,
isGeneratedPerlPPPortHeader,
isGeneratedGameMakerStudio,
isGeneratedGimp,
isGeneratedVisualStudio6,
isGeneratedHaxe,
isGeneratedHTML,
isGeneratedJooq,
}
GeneratedCodeMatchers is the list of all generated code matchers that rely on checking the content of the file to make the guess.
var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{
nameMatches(`(?:^|\/)\.idea\/`),
nameMatches(`(^Pods|\/Pods)\/`),
nameMatches(`(^|\/)Carthage\/Build\/`),
nameMatches(`(?i)\.designer\.(cs|vb)$`),
nameMatches(`(?i)\.feature\.cs$`),
nameContains("node_modules/"),
nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`),
nameEndsWith("Gopkg.lock"),
nameEndsWith("glide.lock"),
nameEndsWith("poetry.lock"),
nameEndsWith("pdm.lock"),
nameEndsWith("uv.lock"),
nameMatches(`(^|\/)(\w+\.)?esy.lock$`),
nameEndsWith("deno.lock"),
nameEndsWith("npm-shrinkwrap.json"),
nameEndsWith("package-lock.json"),
nameEndsWith("pnpm-lock.yaml"),
nameMatches(`(^|\/)\.pnp\..*$`),
nameContains("Godeps/"),
nameEndsWith("composer.lock"),
nameMatches(`.\.zep\.(?:c|h|php)$`),
nameEndsWith("Cargo.lock"),
nameEndsWith("Cargo.toml.orig"),
nameMatches(`(^|\/)flake\.lock$`),
nameMatches(`(^|\/)MODULE\.bazel\.lock$`),
nameEndsWith("Pipfile.lock"),
nameMatches(`(?:^|\/)\.terraform\.lock\.hcl$`),
nameContains("__generated__/"),
nameMatches(`(?i)_tlb\.pas$`),
nameMatches(`(?:^|\/)htmlcov\/`),
nameMatches(`(?:^|.*\/)\.sqlx\/query-.+\.json$`),
}
GeneratedCodeNameMatchers are all the matchers that check whether the code is generated based only on the file name.
var IDByLanguage = map[string]int{}/* 748 elements not displayed */
var LanguageByAliasMap = map[string]string{}/* 1137 elements not displayed */
LanguageByAliasMap keeps alias for different languages and use the name of the languages as an alias too. All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.
var LanguageInfoByID = map[int]LanguageInfo{}/* 748 elements not displayed */
LanguageInfoByID allows accessing LanguageInfo by a language's ID.
var LanguagesByExtension = map[string][]string{}/* 1381 elements not displayed */
var LanguagesByFilename = map[string][]string{}/* 379 elements not displayed */
var LanguagesByInterpreter = map[string][]string{}/* 153 elements not displayed */
var LanguagesColor = map[string]string{}/* 611 elements not displayed */
var LanguagesGroup = map[string]string{
"Alpine Abuild": "Shell",
"Apollo Guidance Computer": "Assembly",
"BibTeX": "TeX",
"Bison": "Yacc",
"Bluespec BH": "Bluespec",
"C2hs Haskell": "Haskell",
"Cairo": "Cairo",
"Cairo Zero": "Cairo",
"CameLIGO": "LigoLANG",
"ColdFusion CFC": "ColdFusion",
"Cylc": "INI",
"ECLiPSe": "Prolog",
"Easybuild": "Python",
"Ecere Projects": "JavaScript",
"Ecmarkup": "HTML",
"EditorConfig": "INI",
"Elvish Transcript": "Elvish",
"Filterscript": "RenderScript",
"Fortran": "Fortran",
"Fortran Free Form": "Fortran",
"Gentoo Ebuild": "Shell",
"Gentoo Eclass": "Shell",
"Git Config": "INI",
"Glimmer JS": "JavaScript",
"Glimmer TS": "TypeScript",
"Gradle Kotlin DSL": "Gradle",
"Groovy Server Pages": "Groovy",
"HTML+ECR": "HTML",
"HTML+EEX": "HTML",
"HTML+ERB": "HTML",
"HTML+PHP": "HTML",
"HTML+Razor": "HTML",
"Isabelle ROOT": "Isabelle",
"JFlex": "Lex",
"JSON with Comments": "JSON",
"Java Server Pages": "Java",
"Java Template Engine": "Java",
"JavaScript+ERB": "JavaScript",
"Jison": "Yacc",
"Jison Lex": "Lex",
"Julia REPL": "Julia",
"Lean 4": "Lean",
"LigoLANG": "LigoLANG",
"Literate Agda": "Agda",
"Literate CoffeeScript": "CoffeeScript",
"Literate Haskell": "Haskell",
"M4Sugar": "M4",
"MUF": "Forth",
"Maven POM": "XML",
"Motorola 68K Assembly": "Assembly",
"NPM Config": "INI",
"NumPy": "Python",
"OASv2-json": "OpenAPI Specification v2",
"OASv2-yaml": "OpenAPI Specification v2",
"OASv3-json": "OpenAPI Specification v3",
"OASv3-yaml": "OpenAPI Specification v3",
"OpenCL": "C",
"OpenRC runscript": "Shell",
"Parrot Assembly": "Parrot",
"Parrot Internal Representation": "Parrot",
"Pic": "Roff",
"PostCSS": "CSS",
"Python console": "Python",
"Python traceback": "Python",
"RBS": "Ruby",
"Readline Config": "INI",
"ReasonLIGO": "LigoLANG",
"Roff Manpage": "Roff",
"SSH Config": "INI",
"STON": "Smalltalk",
"Simple File Verification": "Checksums",
"Snakemake": "Python",
"TSX": "TypeScript",
"Tcsh": "Shell",
"Terraform Template": "HCL",
"Unified Parallel C": "C",
"Unix Assembly": "Assembly",
"Wget Config": "INI",
"X BitMap": "C",
"X PixMap": "C",
"XML Property List": "XML",
"cURL Config": "INI",
"fish": "Shell",
"nanorc": "INI",
}
var LanguagesLogProbabilities = map[string]float64{}/* 678 elements not displayed */
var LanguagesMime = map[string]string{}/* 281 elements not displayed */
var LanguagesType = map[string]int{}/* 748 elements not displayed */
var LinguistCommit = "5fad8d57605a914026a65b0e3ff6815d739944de"
linguist's commit from which files were generated.
var TestMatchers = []regex.EnryRegexp{ regex.MustCompile(`(^|/)tests/.*Test\.php$`), regex.MustCompile(`(^|/)test/.*Test(s?)\.java$`), regex.MustCompile(`(^|/)test(/|/.*/)Test.*\.java$`), regex.MustCompile(`(^|/)test/.*(Test(s?)|Spec(s?))\.scala$`), regex.MustCompile(`(^|/)test_.*\.py$`), regex.MustCompile(`(^|/).*_test\.go$`), regex.MustCompile(`(^|/).*_(test|spec)\.rb$`), regex.MustCompile(`(^|/).*Test(s?)\.cs$`), regex.MustCompile(`(^|/).*\.(test|spec)\.(ts|tsx|js)$`), }
TestMatchers is hand made collection of regexp used by the function `enry.IsTest` to identify test files in different languages.
var TokensLogProbabilities = map[string]map[string]float64{}/* 678 elements not displayed */
var TokensTotal = 2356177.000000
var VendorMatchers = []regex.EnryRegexp{}/* 167 elements not displayed */
Functions ¶
func LanguageByAlias ¶
LanguageByAlias looks up the language name by it's alias or name. It mirrors the logic of github linguist and is needed e.g for heuristcs.yml that mixes names and aliases in a language field (see XPM example).
Types ¶
type GeneratedCodeMatcher ¶ added in v2.5.0
GeneratedCodeMatcher checks whether the file with the given data is generated code.
type GeneratedCodeNameMatcher ¶ added in v2.5.0
GeneratedCodeNameMatcher is a function that tells whether the file with the given name is generated.
type Heuristics ¶
Heuristics is a number of sequentially applied rule.Heuristic where a matching one disambiguates language(s) for a single file extension.
func (Heuristics) Match ¶
func (hs Heuristics) Match(data []byte) []string
Match returns languages identified by the matching rule of the heuristic.
type LanguageInfo ¶ added in v2.8.0
type LanguageInfo struct { // Name is the language name. May contain symbols not safe for use in some filesystems (e.g., `F*`). Name string // FSName is the filesystem safe name. Will only be set if Name is not safe for use in all filesystems. FSName string // Type is the language Type. See data.Type for values. Type Type // Color is the CSS hex color to represent the language. Only used if type is "programming" or "markup". Color string // Group is the name of the parent language. Languages in a group are counted in the statistics as the parent language. Group string // Aliases is a slice of additional aliases (implicitly includes name.downcase) Aliases []string // Extensions is a slice of associated extensions (the first one is considered the primary extension). Extensions []string // A slice of associated interpreters Interpreters []string // Filenames is a slice of filenames commonly associated with the language. Filenames []string // MimeType (maps to codemirror_mime_type in linguist.yaml) is the string name of the file mime type used for highlighting whenever a file is edited. MimeType string // TMScope is the TextMate scope that represents this programming language. TMScope string // AceMode is the name of the Ace Mode used for highlighting whenever a file is edited. AceMode string // CodeMirrorMode is the name of the CodeMirror Mode used for highlighting whenever a file is edited. CodeMirrorMode string // Wrap is a boolean flag to enable line wrapping in an editor. Wrap bool // LanguageID is the Linguist-assigned numeric ID for the language. LanguageID int }
LanguageInfo exposes the data for a language's Linguist YAML entry as a Go struct. See https://github.com/github/linguist/blob/master/lib/linguist/languages.yml
type Type ¶ added in v2.8.0
type Type int
Type represent language's type. Either data, programming, markup, prose, or unknown.