data

package
v2.9.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 6, 2025 License: Apache-2.0 Imports: 4 Imported by: 18

Documentation

Overview

Package data contains only auto-generated data-structures for all the language identification strategies from the Linguist project sources.

Index

Constants

This section is empty.

Variables

View Source
var ContentHeuristics = map[string]*Heuristics{}/* 137 elements not displayed */
View Source
var DocumentationMatchers = []regex.EnryRegexp{
	regex.MustCompile(`^[Dd]ocs?/`),
	regex.MustCompile(`(^|/)[Dd]ocumentation/`),
	regex.MustCompile(`(^|/)[Gg]roovydoc/`),
	regex.MustCompile(`(^|/)[Jj]avadoc/`),
	regex.MustCompile(`^[Mm]an/`),
	regex.MustCompile(`^[Ee]xamples/`),
	regex.MustCompile(`^[Dd]emos?/`),
	regex.MustCompile(`(^|/)inst/doc/`),
	regex.MustCompile(`(^|/)CITATION(\.cff|(S)?(\.(bib|md))?)$`),
	regex.MustCompile(`(^|/)CHANGE(S|LOG)?(\.|$)`),
	regex.MustCompile(`(^|/)CONTRIBUTING(\.|$)`),
	regex.MustCompile(`(^|/)COPYING(\.|$)`),
	regex.MustCompile(`(^|/)INSTALL(\.|$)`),
	regex.MustCompile(`(^|/)LICEN[CS]E(\.|$)`),
	regex.MustCompile(`(^|/)[Ll]icen[cs]e(\.|$)`),
	regex.MustCompile(`(^|/)README(\.|$)`),
	regex.MustCompile(`(^|/)[Rr]eadme(\.|$)`),
	regex.MustCompile(`^[Ss]amples?/`),
}
View Source
var ExtensionsByLanguage = map[string][]string{}/* 705 elements not displayed */
View Source
var FastVendorMatcher = regex.MustCompile(`(?:^(?:(?:[Dd]ependencies/)|(?:debian/)|(?:deps/)|(?:rebar$)))|(?:(?:^|/)(?:(?:BuddyBuildSDK\.framework/)|(?:Carthage/)|(?:Chart\.js$)|(?:Control\.FullScreen\.css)|(?:Control\.FullScreen\.js)|(?:Crashlytics\.framework/)|(?:Fabric\.framework/)|(?:Godeps/_workspace/)|(?:Jenkinsfile$)|(?:Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$)|(?:MathJax/)|(?:MochiKit\.js$)|(?:RealmSwift\.framework)|(?:Realm\.framework)|(?:Sparkle/)|(?:Vagrantfile$)|(?:[Bb]ourbon/.*\.(css|less|scss|styl)$)|(?:[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo))|(?:[Ee]xtern(als?)?/)|(?:[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$)|(?:[Pp]ackages\/.+\.\d+\/)|(?:[Ss]pecs?/fixtures/)|(?:[Tt]ests?/fixtures/)|(?:[Vv]+endor/)|(?:\.[Dd][Ss]_[Ss]tore$)|(?:\.gitattributes$)|(?:\.github/)|(?:\.gitignore$)|(?:\.gitmodules$)|(?:\.gitpod\.Dockerfile$)|(?:\.google_apis/)|(?:\.indent\.pro)|(?:\.mvn/wrapper/)|(?:\.obsidian/)|(?:\.osx$)|(?:\.sublime-project)|(?:\.sublime-workspace)|(?:\.teamcity/)|(?:\.vscode/)|(?:\.yarn/plugins/)|(?:\.yarn/releases/)|(?:\.yarn/sdks/)|(?:\.yarn/unplugged/)|(?:\.yarn/versions/)|(?:_esy$)|(?:ace-builds/)|(?:aclocal\.m4)|(?:activator$)|(?:activator\.bat$)|(?:admin_media/)|(?:angular([^.]*)\.js$)|(?:animate\.(css|less|scss|styl)$)|(?:bootbox\.js)|(?:bootstrap([^/.]*)(\..*)?\.(js|css|less|scss|styl)$)|(?:bootstrap-datepicker/)|(?:bower_components/)|(?:bulma\.(css|sass|scss)$)|(?:cache/)|(?:ckeditor\.js$)|(?:config\.guess$)|(?:config\.sub$)|(?:configure$)|(?:controls\.js$)|(?:cordova([^.]*)\.js$)|(?:cordova\-\d\.\d(\.\d)?\.js$)|(?:cpplint\.py)|(?:custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$)|(?:dist/)|(?:docs?/_?(build|themes?|templates?|static)/)|(?:dojo\.js$)|(?:dotnet-install\.(ps1|sh)$)|(?:dragdrop\.js$)|(?:effects\.js$)|(?:env/)|(?:erlang\.mk)|(?:extjs/.*?\.html$)|(?:extjs/.*?\.js$)|(?:extjs/.*?\.properties$)|(?:extjs/.*?\.txt$)|(?:extjs/.*?\.xml$)|(?:extjs/\.sencha/)|(?:extjs/builds/)|(?:extjs/cmd/)|(?:extjs/docs/)|(?:extjs/examples/)|(?:extjs/locale/)|(?:extjs/packages/)|(?:extjs/plugins/)|(?:extjs/resources/)|(?:extjs/src/)|(?:extjs/welcome/)|(?:fabfile\.py$)|(?:flow-typed/.*\.js$)|(?:font-?awesome/.*\.(css|less|scss|styl)$)|(?:font-?awesome\.(css|less|scss|styl)$)|(?:fontello(.*?)\.css$)|(?:foundation(\..*)?\.js$)|(?:foundation\.(css|less|scss|styl)$)|(?:fuelux\.js)|(?:gradle/wrapper/)|(?:gradlew$)|(?:gradlew\.bat$)|(?:html5shiv\.js$)|(?:inst/extdata/)|(?:jquery([^.]*)\.js$)|(?:jquery([^.]*)\.unobtrusive\-ajax\.js$)|(?:jquery([^.]*)\.validate(\.unobtrusive)?\.js$)|(?:jquery\-\d\.\d+(\.\d+)?\.js$)|(?:jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$)|(?:jquery\.(ui|effects)\.([^.]*)\.(js|css)$)|(?:jquery\.dataTables\.js)|(?:jquery\.fancybox\.(js|css))|(?:jquery\.fileupload(-\w+)?\.js$)|(?:jquery\.fn\.gantt\.js)|(?:knockout-(\d+\.){3}(debug\.)?js$)|(?:leaflet\.draw-src\.js)|(?:leaflet\.draw\.css)|(?:leaflet\.spin\.js)|(?:libtool\.m4)|(?:ltoptions\.m4)|(?:ltsugar\.m4)|(?:ltversion\.m4)|(?:lt~obsolete\.m4)|(?:materialize\.(css|less|scss|styl|js)$)|(?:modernizr\-\d\.\d+(\.\d+)?\.js$)|(?:modernizr\.custom\.\d+\.js$)|(?:mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$)|(?:mvnw$)|(?:mvnw\.cmd$)|(?:node_modules/)|(?:normalize\.(css|less|scss|styl)$)|(?:octicons\.css)|(?:pdf\.worker\.js)|(?:proguard-rules\.pro$)|(?:proguard\.pro$)|(?:prototype(.*)\.js$)|(?:puphpet/)|(?:react(-[^.]*)?\.js$)|(?:run\.n$)|(?:select2/.*\.(css|scss|js)$)|(?:shBrush([^.]*)\.js$)|(?:shCore\.js$)|(?:shLegacy\.js$)|(?:skeleton\.(css|less|scss|styl)$)|(?:slick\.\w+.js$)|(?:sprockets-octicons\.scss)|(?:testdata/)|(?:tiny_mce([^.]*)\.js$)|(?:tiny_mce/(langs|plugins|themes|utils))|(?:vendors?/)|(?:waf$)|(?:wicket-leaflet\.js)|(?:yahoo-([^.]*)\.js$)|(?:yui([^.]*)\.js$)))|(?:(.*?)\.d\.ts$)|(?:(3rd|[Tt]hird)[-_]?[Pp]arty/)|(?:([^\s]*)import\.(css|less|scss|styl)$)|(?:(\.|-)min\.(js|css)$)|(?:(^|\/)d3(\.v\d+)?([^.]*)\.js$)|(?:-vsdoc\.js$)|(?:\.imageset/)|(?:\.intellisense\.js$)|(?:\.xctemplate/)`)

FastVendorMatcher is equivalent to matching any of the VendorMatchers.

View Source
var GeneratedCodeExtensions = map[string]struct{}{

	".nib":             {},
	".xcworkspacedata": {},
	".xcuserstate":     {},
}

GeneratedCodeExtensions contains all extensions that belong to generated files for sure.

View Source
var GeneratedCodeMatchers = []GeneratedCodeMatcher{
	isMinifiedFile,
	hasSourceMapReference,
	isSourceMap,
	isCompiledCoffeeScript,
	isGeneratedNetDocfile,
	isGeneratedJavaScriptPEGParser,
	isGeneratedPostScript,
	isGeneratedGo,
	isGeneratedProtobufFromGo,
	isGeneratedProtobuf,
	isGeneratedJavaScriptProtocolBuffer,
	isGeneratedApacheThrift,
	isGeneratedJNIHeader,
	isVCRCassette,
	isCompiledCythonFile,
	isGeneratedModule,
	isGeneratedUnity3DMeta,
	isGeneratedRacc,
	isGeneratedJFlex,
	isGeneratedGrammarKit,
	isGeneratedRoxygen2,
	isGeneratedJison,
	isGeneratedGRPCCpp,
	isGeneratedDart,
	isGeneratedPerlPPPortHeader,
	isGeneratedGameMakerStudio,
	isGeneratedGimp,
	isGeneratedVisualStudio6,
	isGeneratedHaxe,
	isGeneratedHTML,
	isGeneratedJooq,
}

GeneratedCodeMatchers is the list of all generated code matchers that rely on checking the content of the file to make the guess.

View Source
var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{

	nameMatches(`(?:^|\/)\.idea\/`),

	nameMatches(`(^Pods|\/Pods)\/`),

	nameMatches(`(^|\/)Carthage\/Build\/`),

	nameMatches(`(?i)\.designer\.(cs|vb)$`),

	nameMatches(`(?i)\.feature\.cs$`),

	nameContains("node_modules/"),

	nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`),

	nameEndsWith("Gopkg.lock"),
	nameEndsWith("glide.lock"),

	nameEndsWith("poetry.lock"),

	nameEndsWith("pdm.lock"),

	nameEndsWith("uv.lock"),

	nameMatches(`(^|\/)(\w+\.)?esy.lock$`),

	nameEndsWith("deno.lock"),

	nameEndsWith("npm-shrinkwrap.json"),

	nameEndsWith("package-lock.json"),

	nameEndsWith("pnpm-lock.yaml"),

	nameMatches(`(^|\/)\.pnp\..*$`),

	nameContains("Godeps/"),

	nameEndsWith("composer.lock"),

	nameMatches(`.\.zep\.(?:c|h|php)$`),

	nameEndsWith("Cargo.lock"),

	nameEndsWith("Cargo.toml.orig"),

	nameMatches(`(^|\/)flake\.lock$`),

	nameMatches(`(^|\/)MODULE\.bazel\.lock$`),

	nameEndsWith("Pipfile.lock"),

	nameMatches(`(?:^|\/)\.terraform\.lock\.hcl$`),

	nameContains("__generated__/"),

	nameMatches(`(?i)_tlb\.pas$`),

	nameMatches(`(?:^|\/)htmlcov\/`),

	nameMatches(`(?:^|.*\/)\.sqlx\/query-.+\.json$`),
}

GeneratedCodeNameMatchers are all the matchers that check whether the code is generated based only on the file name.

View Source
var IDByLanguage = map[string]int{}/* 748 elements not displayed */
View Source
var LanguageByAliasMap = map[string]string{}/* 1137 elements not displayed */

LanguageByAliasMap keeps alias for different languages and use the name of the languages as an alias too. All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.

View Source
var LanguageInfoByID = map[int]LanguageInfo{}/* 748 elements not displayed */

LanguageInfoByID allows accessing LanguageInfo by a language's ID.

View Source
var LanguagesByExtension = map[string][]string{}/* 1381 elements not displayed */
View Source
var LanguagesByFilename = map[string][]string{}/* 379 elements not displayed */
View Source
var LanguagesByInterpreter = map[string][]string{}/* 153 elements not displayed */
View Source
var LanguagesColor = map[string]string{}/* 611 elements not displayed */
View Source
var LanguagesGroup = map[string]string{
	"Alpine Abuild":                  "Shell",
	"Apollo Guidance Computer":       "Assembly",
	"BibTeX":                         "TeX",
	"Bison":                          "Yacc",
	"Bluespec BH":                    "Bluespec",
	"C2hs Haskell":                   "Haskell",
	"Cairo":                          "Cairo",
	"Cairo Zero":                     "Cairo",
	"CameLIGO":                       "LigoLANG",
	"ColdFusion CFC":                 "ColdFusion",
	"Cylc":                           "INI",
	"ECLiPSe":                        "Prolog",
	"Easybuild":                      "Python",
	"Ecere Projects":                 "JavaScript",
	"Ecmarkup":                       "HTML",
	"EditorConfig":                   "INI",
	"Elvish Transcript":              "Elvish",
	"Filterscript":                   "RenderScript",
	"Fortran":                        "Fortran",
	"Fortran Free Form":              "Fortran",
	"Gentoo Ebuild":                  "Shell",
	"Gentoo Eclass":                  "Shell",
	"Git Config":                     "INI",
	"Glimmer JS":                     "JavaScript",
	"Glimmer TS":                     "TypeScript",
	"Gradle Kotlin DSL":              "Gradle",
	"Groovy Server Pages":            "Groovy",
	"HTML+ECR":                       "HTML",
	"HTML+EEX":                       "HTML",
	"HTML+ERB":                       "HTML",
	"HTML+PHP":                       "HTML",
	"HTML+Razor":                     "HTML",
	"Isabelle ROOT":                  "Isabelle",
	"JFlex":                          "Lex",
	"JSON with Comments":             "JSON",
	"Java Server Pages":              "Java",
	"Java Template Engine":           "Java",
	"JavaScript+ERB":                 "JavaScript",
	"Jison":                          "Yacc",
	"Jison Lex":                      "Lex",
	"Julia REPL":                     "Julia",
	"Lean 4":                         "Lean",
	"LigoLANG":                       "LigoLANG",
	"Literate Agda":                  "Agda",
	"Literate CoffeeScript":          "CoffeeScript",
	"Literate Haskell":               "Haskell",
	"M4Sugar":                        "M4",
	"MUF":                            "Forth",
	"Maven POM":                      "XML",
	"Motorola 68K Assembly":          "Assembly",
	"NPM Config":                     "INI",
	"NumPy":                          "Python",
	"OASv2-json":                     "OpenAPI Specification v2",
	"OASv2-yaml":                     "OpenAPI Specification v2",
	"OASv3-json":                     "OpenAPI Specification v3",
	"OASv3-yaml":                     "OpenAPI Specification v3",
	"OpenCL":                         "C",
	"OpenRC runscript":               "Shell",
	"Parrot Assembly":                "Parrot",
	"Parrot Internal Representation": "Parrot",
	"Pic":                            "Roff",
	"PostCSS":                        "CSS",
	"Python console":                 "Python",
	"Python traceback":               "Python",
	"RBS":                            "Ruby",
	"Readline Config":                "INI",
	"ReasonLIGO":                     "LigoLANG",
	"Roff Manpage":                   "Roff",
	"SSH Config":                     "INI",
	"STON":                           "Smalltalk",
	"Simple File Verification":       "Checksums",
	"Snakemake":                      "Python",
	"TSX":                            "TypeScript",
	"Tcsh":                           "Shell",
	"Terraform Template":             "HCL",
	"Unified Parallel C":             "C",
	"Unix Assembly":                  "Assembly",
	"Wget Config":                    "INI",
	"X BitMap":                       "C",
	"X PixMap":                       "C",
	"XML Property List":              "XML",
	"cURL Config":                    "INI",
	"fish":                           "Shell",
	"nanorc":                         "INI",
}
View Source
var LanguagesLogProbabilities = map[string]float64{}/* 678 elements not displayed */
View Source
var LanguagesMime = map[string]string{}/* 281 elements not displayed */
View Source
var LanguagesType = map[string]int{}/* 748 elements not displayed */
View Source
var LinguistCommit = "5fad8d57605a914026a65b0e3ff6815d739944de"

linguist's commit from which files were generated.

View Source
var TestMatchers = []regex.EnryRegexp{
	regex.MustCompile(`(^|/)tests/.*Test\.php$`),
	regex.MustCompile(`(^|/)test/.*Test(s?)\.java$`),
	regex.MustCompile(`(^|/)test(/|/.*/)Test.*\.java$`),
	regex.MustCompile(`(^|/)test/.*(Test(s?)|Spec(s?))\.scala$`),
	regex.MustCompile(`(^|/)test_.*\.py$`),
	regex.MustCompile(`(^|/).*_test\.go$`),
	regex.MustCompile(`(^|/).*_(test|spec)\.rb$`),
	regex.MustCompile(`(^|/).*Test(s?)\.cs$`),
	regex.MustCompile(`(^|/).*\.(test|spec)\.(ts|tsx|js)$`),
}

TestMatchers is hand made collection of regexp used by the function `enry.IsTest` to identify test files in different languages.

View Source
var TokensLogProbabilities = map[string]map[string]float64{}/* 678 elements not displayed */
View Source
var TokensTotal = 2356177.000000
View Source
var VendorMatchers = []regex.EnryRegexp{}/* 167 elements not displayed */

Functions

func LanguageByAlias

func LanguageByAlias(langOrAlias string) (lang string, ok bool)

LanguageByAlias looks up the language name by it's alias or name. It mirrors the logic of github linguist and is needed e.g for heuristcs.yml that mixes names and aliases in a language field (see XPM example).

Types

type GeneratedCodeMatcher added in v2.5.0

type GeneratedCodeMatcher func(path, ext string, content []byte) bool

GeneratedCodeMatcher checks whether the file with the given data is generated code.

type GeneratedCodeNameMatcher added in v2.5.0

type GeneratedCodeNameMatcher func(string) bool

GeneratedCodeNameMatcher is a function that tells whether the file with the given name is generated.

type Heuristics

type Heuristics []rule.Heuristic

Heuristics is a number of sequentially applied rule.Heuristic where a matching one disambiguates language(s) for a single file extension.

func (Heuristics) Match

func (hs Heuristics) Match(data []byte) []string

Match returns languages identified by the matching rule of the heuristic.

type LanguageInfo added in v2.8.0

type LanguageInfo struct {
	// Name is the language name. May contain symbols not safe for use in some filesystems (e.g., `F*`).
	Name string
	// FSName is the filesystem safe name. Will only be set if Name is not safe for use in all filesystems.
	FSName string
	// Type is the language Type. See data.Type for values.
	Type Type
	// Color is the CSS hex color to represent the language. Only used if type is "programming" or "markup".
	Color string
	// Group is the name of the parent language. Languages in a group are counted in the statistics as the parent language.
	Group string
	// Aliases is a slice of additional aliases (implicitly includes name.downcase)
	Aliases []string
	// Extensions is a slice of associated extensions (the first one is considered the primary extension).
	Extensions []string
	// A slice of associated interpreters
	Interpreters []string
	// Filenames is a slice of filenames commonly associated with the language.
	Filenames []string
	// MimeType (maps to codemirror_mime_type in linguist.yaml) is the string name of the file mime type used for highlighting whenever a file is edited.
	MimeType string
	// TMScope is the TextMate scope that represents this programming language.
	TMScope string
	// AceMode is the name of the Ace Mode used for highlighting whenever a file is edited.
	AceMode string
	// CodeMirrorMode is the name of the CodeMirror Mode used for highlighting whenever a file is edited.
	CodeMirrorMode string
	// Wrap is a boolean flag to enable line wrapping in an editor.
	Wrap bool
	// LanguageID is the Linguist-assigned numeric ID for the language.
	LanguageID int
}

LanguageInfo exposes the data for a language's Linguist YAML entry as a Go struct. See https://github.com/github/linguist/blob/master/lib/linguist/languages.yml

type Type added in v2.8.0

type Type int

Type represent language's type. Either data, programming, markup, prose, or unknown.

const (
	TypeUnknown Type = iota
	TypeData
	TypeProgramming
	TypeMarkup
	TypeProse
)

Type's values.

func TypeForString added in v2.8.0

func TypeForString(s string) Type

func (Type) String added in v2.8.0

func (t Type) String() string

Directories

Path Synopsis
Package rule contains rule-based heuristic implementations.
Package rule contains rule-based heuristic implementations.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL