data

package
v2.0.0-...-6ef3e87 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 15, 2023 License: Apache-2.0 Imports: 5 Imported by: 0

Documentation

Overview

Package data contains only auto-generated data-structures for all the language identification strategies from the Linguist project sources.

Index

Constants

This section is empty.

Variables

View Source
var ContentHeuristics = map[string]*Heuristics{}/* 112 elements not displayed */
View Source
var DocumentationMatchers = []regex.EnryRegexp{
	regex.MustCompile(`^[Dd]ocs?/`),
	regex.MustCompile(`(^|/)[Dd]ocumentation/`),
	regex.MustCompile(`(^|/)[Gg]roovydoc/`),
	regex.MustCompile(`(^|/)[Jj]avadoc/`),
	regex.MustCompile(`^[Mm]an/`),
	regex.MustCompile(`^[Ee]xamples/`),
	regex.MustCompile(`^[Dd]emos?/`),
	regex.MustCompile(`(^|/)inst/doc/`),
	regex.MustCompile(`(^|/)CITATION(\.cff|(S)?(\.(bib|md))?)$`),
	regex.MustCompile(`(^|/)CHANGE(S|LOG)?(\.|$)`),
	regex.MustCompile(`(^|/)CONTRIBUTING(\.|$)`),
	regex.MustCompile(`(^|/)COPYING(\.|$)`),
	regex.MustCompile(`(^|/)INSTALL(\.|$)`),
	regex.MustCompile(`(^|/)LICEN[CS]E(\.|$)`),
	regex.MustCompile(`(^|/)[Ll]icen[cs]e(\.|$)`),
	regex.MustCompile(`(^|/)README(\.|$)`),
	regex.MustCompile(`(^|/)[Rr]eadme(\.|$)`),
	regex.MustCompile(`^[Ss]amples?/`),
}
View Source
var ExtensionsByLanguage = map[string][]string{}/* 619 elements not displayed */
View Source
var FastVendorMatcher = regex.MustCompile(`(?:^(?:(?:[Dd]ependencies/)|(?:debian/)|(?:deps/)|(?:rebar$)))|(?:(?:^|/)(?:(?:BuddyBuildSDK\.framework/)|(?:Carthage/)|(?:Chart\.js$)|(?:Control\.FullScreen\.css)|(?:Control\.FullScreen\.js)|(?:Crashlytics\.framework/)|(?:Fabric\.framework/)|(?:Godeps/_workspace/)|(?:Jenkinsfile$)|(?:Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$)|(?:MathJax/)|(?:MochiKit\.js$)|(?:RealmSwift\.framework)|(?:Realm\.framework)|(?:Sparkle/)|(?:Vagrantfile$)|(?:[Bb]ourbon/.*\.(css|less|scss|styl)$)|(?:[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo))|(?:[Ee]xtern(als?)?/)|(?:[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$)|(?:[Pp]ackages\/.+\.\d+\/)|(?:[Ss]pecs?/fixtures/)|(?:[Tt]ests?/fixtures/)|(?:[Vv]+endor/)|(?:\.[Dd][Ss]_[Ss]tore$)|(?:\.gitattributes$)|(?:\.github/)|(?:\.gitignore$)|(?:\.gitmodules$)|(?:\.gitpod\.Dockerfile$)|(?:\.google_apis/)|(?:\.indent\.pro)|(?:\.mvn/wrapper/)|(?:\.osx$)|(?:\.sublime-project)|(?:\.sublime-workspace)|(?:\.vscode/)|(?:\.yarn/plugins/)|(?:\.yarn/releases/)|(?:\.yarn/sdks/)|(?:\.yarn/unplugged/)|(?:\.yarn/versions/)|(?:_esy$)|(?:ace-builds/)|(?:aclocal\.m4)|(?:activator$)|(?:activator\.bat$)|(?:admin_media/)|(?:angular([^.]*)\.js$)|(?:animate\.(css|less|scss|styl)$)|(?:bootbox\.js)|(?:bootstrap([^/.]*)\.(js|css|less|scss|styl)$)|(?:bootstrap-datepicker/)|(?:bower_components/)|(?:bulma\.(css|sass|scss)$)|(?:cache/)|(?:ckeditor\.js$)|(?:config\.guess$)|(?:config\.sub$)|(?:configure$)|(?:controls\.js$)|(?:cordova([^.]*)\.js$)|(?:cordova\-\d\.\d(\.\d)?\.js$)|(?:cpplint\.py)|(?:custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$)|(?:dist/)|(?:docs?/_?(build|themes?|templates?|static)/)|(?:dojo\.js$)|(?:dotnet-install\.(ps1|sh)$)|(?:dragdrop\.js$)|(?:effects\.js$)|(?:env/)|(?:erlang\.mk)|(?:extjs/.*?\.html$)|(?:extjs/.*?\.js$)|(?:extjs/.*?\.properties$)|(?:extjs/.*?\.txt$)|(?:extjs/.*?\.xml$)|(?:extjs/\.sencha/)|(?:extjs/builds/)|(?:extjs/cmd/)|(?:extjs/docs/)|(?:extjs/examples/)|(?:extjs/locale/)|(?:extjs/packages/)|(?:extjs/plugins/)|(?:extjs/resources/)|(?:extjs/src/)|(?:extjs/welcome/)|(?:fabfile\.py$)|(?:flow-typed/.*\.js$)|(?:font-?awesome/.*\.(css|less|scss|styl)$)|(?:font-?awesome\.(css|less|scss|styl)$)|(?:fontello(.*?)\.css$)|(?:foundation(\..*)?\.js$)|(?:foundation\.(css|less|scss|styl)$)|(?:fuelux\.js)|(?:gradle/wrapper/)|(?:gradlew$)|(?:gradlew\.bat$)|(?:html5shiv\.js$)|(?:inst/extdata/)|(?:jquery([^.]*)\.js$)|(?:jquery([^.]*)\.unobtrusive\-ajax\.js$)|(?:jquery([^.]*)\.validate(\.unobtrusive)?\.js$)|(?:jquery\-\d\.\d+(\.\d+)?\.js$)|(?:jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$)|(?:jquery\.(ui|effects)\.([^.]*)\.(js|css)$)|(?:jquery\.dataTables\.js)|(?:jquery\.fancybox\.(js|css))|(?:jquery\.fileupload(-\w+)?\.js$)|(?:jquery\.fn\.gantt\.js)|(?:knockout-(\d+\.){3}(debug\.)?js$)|(?:leaflet\.draw-src\.js)|(?:leaflet\.draw\.css)|(?:leaflet\.spin\.js)|(?:libtool\.m4)|(?:ltoptions\.m4)|(?:ltsugar\.m4)|(?:ltversion\.m4)|(?:lt~obsolete\.m4)|(?:materialize\.(css|less|scss|styl|js)$)|(?:modernizr\-\d\.\d+(\.\d+)?\.js$)|(?:modernizr\.custom\.\d+\.js$)|(?:mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$)|(?:mvnw$)|(?:mvnw\.cmd$)|(?:node_modules/)|(?:normalize\.(css|less|scss|styl)$)|(?:octicons\.css)|(?:pdf\.worker\.js)|(?:proguard-rules\.pro$)|(?:proguard\.pro$)|(?:prototype(.*)\.js$)|(?:puphpet/)|(?:react(-[^.]*)?\.js$)|(?:run\.n$)|(?:select2/.*\.(css|scss|js)$)|(?:shBrush([^.]*)\.js$)|(?:shCore\.js$)|(?:shLegacy\.js$)|(?:skeleton\.(css|less|scss|styl)$)|(?:slick\.\w+.js$)|(?:sprockets-octicons\.scss)|(?:testdata/)|(?:tiny_mce([^.]*)\.js$)|(?:tiny_mce/(langs|plugins|themes|utils))|(?:vendors?/)|(?:vignettes/)|(?:waf$)|(?:wicket-leaflet\.js)|(?:yahoo-([^.]*)\.js$)|(?:yui([^.]*)\.js$)))|(?:(.*?)\.d\.ts$)|(?:(3rd|[Tt]hird)[-_]?[Pp]arty/)|(?:([^\s]*)import\.(css|less|scss|styl)$)|(?:(\.|-)min\.(js|css)$)|(?:(^|\/)d3(\.v\d+)?([^.]*)\.js$)|(?:-vsdoc\.js$)|(?:\.imageset/)|(?:\.intellisense\.js$)|(?:\.xctemplate/)`)

FastVendorMatcher is equivalent to matching any of the VendorMatchers.

View Source
var GeneratedCodeExtensions = map[string]struct{}{

	".nib":             {},
	".xcworkspacedata": {},
	".xcuserstate":     {},
}

GeneratedCodeExtensions contains all extensions that belong to generated files for sure.

View Source
var GeneratedCodeMatchers = []GeneratedCodeMatcher{
	isMinifiedFile,
	hasSourceMapReference,
	isSourceMap,
	isCompiledCoffeeScript,
	isGeneratedNetDocfile,
	isGeneratedJavaScriptPEGParser,
	isGeneratedPostScript,
	isGeneratedGo,
	isGeneratedProtobufFromGo,
	isGeneratedProtobuf,
	isGeneratedJavaScriptProtocolBuffer,
	isGeneratedApacheThrift,
	isGeneratedJNIHeader,
	isVCRCassette,
	isCompiledCythonFile,
	isGeneratedModule,
	isGeneratedUnity3DMeta,
	isGeneratedRacc,
	isGeneratedJFlex,
	isGeneratedGrammarKit,
	isGeneratedRoxygen2,
	isGeneratedJison,
	isGeneratedGRPCCpp,
	isGeneratedDart,
	isGeneratedPerlPPPortHeader,
	isGeneratedGameMakerStudio,
	isGeneratedGimp,
	isGeneratedVisualStudio6,
	isGeneratedHaxe,
	isGeneratedHTML,
	isGeneratedJooq,
}

GeneratedCodeMatchers is the list of all generated code matchers that rely on checking the content of the file to make the guess.

View Source
var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{

	nameMatches(`(^Pods|\/Pods)\/`),

	nameMatches(`(^|\/)Carthage\/Build\/`),

	nameMatches(`(?i)\.designer\.(cs|vb)$`),

	nameEndsWith(".feature.cs"),

	nameContains("node_modules/"),

	nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`),

	nameEndsWith("Gopkg.lock"),
	nameEndsWith("glide.lock"),

	nameMatches(`(^|\/)(\w+\.)?esy.lock$`),

	nameEndsWith("npm-shrinkwrap.json"),

	nameEndsWith("package-lock.json"),

	nameMatches(`(^|\/)\.pnp\..*$`),

	nameContains("Godeps/"),

	nameEndsWith("composer.lock"),

	nameMatches(`.\.zep\.(?:c|h|php)$`),

	nameEndsWith("Cargo.lock"),

	nameEndsWith("Pipfile.lock"),

	nameContains("__generated__/"),

	nameEndsWith("poetry.lock"),
}

GeneratedCodeNameMatchers are all the matchers that check whether the code is generated based only on the file name.

View Source
var IDByLanguage = map[string]int{}/* 653 elements not displayed */
View Source
var LanguageByAliasMap = map[string]string{}/* 977 elements not displayed */

LanguageByAliasMap keeps alias for different languages and use the name of the languages as an alias too. All the keys (alias or not) are written in lower case and the whitespaces has been replaced by underscores.

View Source
var LanguageInfoByID = map[int]LanguageInfo{}/* 653 elements not displayed */

LanguageInfoByID allows accessing LanguageInfo by a language's ID.

View Source
var LanguagesByExtension = map[string][]string{}/* 1289 elements not displayed */
View Source
var LanguagesByFilename = map[string][]string{}/* 326 elements not displayed */
View Source
var LanguagesByInterpreter = map[string][]string{}/* 136 elements not displayed */
View Source
var LanguagesColor = map[string]string{}/* 519 elements not displayed */
View Source
var LanguagesGroup = map[string]string{
	"Alpine Abuild":                  "Shell",
	"Apollo Guidance Computer":       "Assembly",
	"BibTeX":                         "TeX",
	"Bison":                          "Yacc",
	"C2hs Haskell":                   "Haskell",
	"CameLIGO":                       "LigoLANG",
	"ColdFusion CFC":                 "ColdFusion",
	"ECLiPSe":                        "prolog",
	"Easybuild":                      "Python",
	"Ecere Projects":                 "JavaScript",
	"EditorConfig":                   "INI",
	"Filterscript":                   "RenderScript",
	"Fortran":                        "Fortran",
	"Fortran Free Form":              "Fortran",
	"Gentoo Ebuild":                  "Shell",
	"Gentoo Eclass":                  "Shell",
	"Git Attributes":                 "INI",
	"Git Config":                     "INI",
	"Groovy Server Pages":            "Groovy",
	"HTML+ECR":                       "HTML",
	"HTML+EEX":                       "HTML",
	"HTML+ERB":                       "HTML",
	"HTML+PHP":                       "HTML",
	"HTML+Razor":                     "HTML",
	"Ignore List":                    "INI",
	"Isabelle ROOT":                  "Isabelle",
	"JFlex":                          "Lex",
	"JSON with Comments":             "JSON",
	"Java Server Pages":              "Java",
	"JavaScript+ERB":                 "JavaScript",
	"Jison":                          "Yacc",
	"Jison Lex":                      "Lex",
	"LigoLANG":                       "LigoLANG",
	"Literate Agda":                  "Agda",
	"Literate CoffeeScript":          "CoffeeScript",
	"Literate Haskell":               "Haskell",
	"M4Sugar":                        "M4",
	"MUF":                            "Forth",
	"Maven POM":                      "XML",
	"Motorola 68K Assembly":          "Assembly",
	"NPM Config":                     "INI",
	"NumPy":                          "Python",
	"OpenCL":                         "C",
	"OpenRC runscript":               "Shell",
	"Parrot Assembly":                "Parrot",
	"Parrot Internal Representation": "Parrot",
	"Pic":                            "Roff",
	"PostCSS":                        "CSS",
	"Python console":                 "Python",
	"Python traceback":               "Python",
	"Readline Config":                "INI",
	"ReasonLIGO":                     "LigoLANG",
	"Roff Manpage":                   "Roff",
	"SSH Config":                     "INI",
	"STON":                           "Smalltalk",
	"SugarSS":                        "CSS",
	"TSX":                            "TypeScript",
	"Tcsh":                           "Shell",
	"Unified Parallel C":             "C",
	"Unix Assembly":                  "Assembly",
	"Wget Config":                    "INI",
	"X BitMap":                       "C",
	"X PixMap":                       "C",
	"XML Property List":              "XML",
	"cURL Config":                    "INI",
	"fish":                           "Shell",
	"nanorc":                         "INI",
}
View Source
var LanguagesLogProbabilities = map[string]float64{}/* 586 elements not displayed */
View Source
var LanguagesMime = map[string]string{}/* 254 elements not displayed */
View Source
var LanguagesType = map[string]int{}/* 653 elements not displayed */
View Source
var LinguistCommit = "d7799da826e01acdb8f84694d33116dccaabe9c2"

linguist's commit from which files were generated.

View Source
var TestMatchers = []regex.EnryRegexp{
	regex.MustCompile(`(^|/)tests/.*Test\.php$`),
	regex.MustCompile(`(^|/)test/.*Test(s?)\.java$`),
	regex.MustCompile(`(^|/)test(/|/.*/)Test.*\.java$`),
	regex.MustCompile(`(^|/)test/.*(Test(s?)|Spec(s?))\.scala$`),
	regex.MustCompile(`(^|/)test_.*\.py$`),
	regex.MustCompile(`(^|/).*_test\.go$`),
	regex.MustCompile(`(^|/).*_(test|spec)\.rb$`),
	regex.MustCompile(`(^|/).*Test(s?)\.cs$`),
	regex.MustCompile(`(^|/).*\.(test|spec)\.(ts|tsx|js)$`),
}

TestMatchers is hand made collection of regexp used by the function `enry.IsTest` to identify test files in different languages.

View Source
var TokensLogProbabilities = map[string]map[string]float64{}/* 586 elements not displayed */
View Source
var TokensTotal = 2197947.000000
View Source
var VendorMatchers = []regex.EnryRegexp{}/* 166 elements not displayed */

Functions

func LanguageByAlias

func LanguageByAlias(langOrAlias string) (lang string, ok bool)

LanguageByAlias looks up the language name by it's alias or name. It mirrors the logic of github linguist and is needed e.g for heuristcs.yml that mixes names and aliases in a language field (see XPM example).

Types

type GeneratedCodeMatcher

type GeneratedCodeMatcher func(path, ext string, content []byte) bool

GeneratedCodeMatcher checks whether the file with the given data is generated code.

type GeneratedCodeNameMatcher

type GeneratedCodeNameMatcher func(string) bool

GeneratedCodeNameMatcher is a function that tells whether the file with the given name is generated.

type Heuristics

type Heuristics []rule.Heuristic

Heuristics is a number of sequentially applied rule.Heuristic where a matching one disambiguates language(s) for a single file extension.

func (Heuristics) Match

func (hs Heuristics) Match(data []byte) []string

Match returns languages identified by the matching rule of the heuristic.

type LanguageInfo

type LanguageInfo struct {
	// Name is the language name. May contain symbols not safe for use in some filesystems (e.g., `F*`).
	Name string
	// FSName is the filesystem safe name. Will only be set if Name is not safe for use in all filesystems.
	FSName string
	// Type is the language Type. See data.Type for values.
	Type Type
	// Color is the CSS hex color to represent the language. Only used if type is "programming" or "markup".
	Color string
	// Group is the name of the parent language. Languages in a group are counted in the statistics as the parent language.
	Group string
	// Aliases is a slice of additional aliases (implicitly includes name.downcase)
	Aliases []string
	// Extensions is a slice of associated extensions (the first one is considered the primary extension).
	Extensions []string
	// A slice of associated interpreters
	Interpreters []string
	// Filenames is a slice of filenames commonly associated with the language.
	Filenames []string
	// MimeType (maps to codemirror_mime_type in linguist.yaml) is the string name of the file mime type used for highlighting whenever a file is edited.
	MimeType string
	// TMScope is the TextMate scope that represents this programming language.
	TMScope string
	// AceMode is the name of the Ace Mode used for highlighting whenever a file is edited.
	AceMode string
	// CodeMirrorMode is the name of the CodeMirror Mode used for highlighting whenever a file is edited.
	CodeMirrorMode string
	// Wrap is a boolean flag to enable line wrapping in an editor.
	Wrap bool
	// LanguageID is the Linguist-assigned numeric ID for the language.
	LanguageID int
}

LanguageInfo exposes the data for a language's Linguist YAML entry as a Go struct. See https://github.com/github/linguist/blob/master/lib/linguist/languages.yml

type Type

type Type int

Type represent language's type. Either data, programming, markup, prose, or unknown.

const (
	TypeUnknown Type = iota
	TypeData
	TypeProgramming
	TypeMarkup
	TypeProse
)

Type's values.

func TypeForString

func TypeForString(s string) Type

func (Type) String

func (t Type) String() string

Directories

Path Synopsis
Package rule contains rule-based heuristic implementations.
Package rule contains rule-based heuristic implementations.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL