Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
var DataSourcesInf = map[int]DataSourceInf{ 1: { Title: "Catalogue of Life", TitleShort: "Catalogue of Life", UUID: "d4df2968-4257-4ad9-ab81-bedbbfb25e2a", HomeURL: "https://www.catalogueoflife.org/", DataURL: "http://www.catalogueoflife.org/DCA_Export/archive.php", IsOutlinkReady: true, OutlinkURL: "https://www.catalogueoflife.org/data/taxon/{}", OutlinkID: func(n NameInf) string { return n.RecordID }, }, 2: { TitleShort: "Wikispecies", UUID: "68923690-0727-473c-b7c5-2ae9e601e3fd", HomeURL: "https://species.wikimedia.org/wiki/Main_Page", IsOutlinkReady: true, DataURL: "http://dumps.wikimedia.org/specieswiki/latest/" + "specieswiki-latest-pages-articles.xml.bz2", OutlinkURL: "http://species.wikimedia.org/wiki/{}", OutlinkID: func(n NameInf) string { return strings.ReplaceAll(n.CanonicalFull, " ", "_") }, }, 3: { Title: "Integrated Taxonomic Information System", TitleShort: "ITIS", UUID: "5d066e84-e512-4a2f-875c-0a605d3d9f35", HomeURL: "https://www.itis.gov/", DataURL: "https://www.itis.gov/downloads/itisMySQLTables.tar.gz", IsOutlinkReady: true, OutlinkURL: "https://www.itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value={}#null", OutlinkID: func(n NameInf) string { return n.RecordID }, }, 4: { Title: "National Center for Biotechnology Information", TitleShort: "NCBI", UUID: "97d7633b-5f79-4307-a397-3c29402d9311", HomeURL: "https://www.ncbi.nlm.nih.gov/", DataURL: "ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz", IsOutlinkReady: true, OutlinkURL: "https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?" + "mode=Undef&name={}&lvl=0&srchmode=1&keep=1&unlock", OutlinkID: func(n NameInf) string { return url.PathEscape(n.Canonical) }, }, 5: { Title: "Index Fungorum: Species Fungorum", TitleShort: "Index Fungorum", UUID: "af06816a-0b28-4a09-8219-bd1d63289858", HomeURL: "http://www.speciesfungorum.org", IsOutlinkReady: true, OutlinkURL: "http://www.indexfungorum.org/Names/NamesRecord.asp?RecordID={}", OutlinkID: func(n NameInf) string { return n.RecordID }, }, 8: { TitleShort: "IRMNG (old)", UUID: "f8e586aa-876e-4b0a-ab89-da0b4a64c19a", HomeURL: "https://irmng.org/", }, 9: { TitleShort: "WoRMS", UUID: "bf077d91-673a-4be4-8af9-76db45d07e98", IsOutlinkReady: true, HomeURL: "https://marinespecies.org", }, 10: { TitleShort: "Freebase", UUID: "bacd21f0-44e0-43e2-914c-70929916f257", }, 11: { Title: "Global Biodiversity Information Facility Backbone Taxonomy", TitleShort: "GBIF Backbone Taxonomy", UUID: "eebb6f49-e1a1-4f42-b9d5-050844c893cd", IsOutlinkReady: true, HomeURL: "https://www.gbif.org/dataset/d7dddbf4-2cf0-4f39-9b2a-bb099caae36c", }, 12: { TitleShort: "EOL", UUID: "dba5f880-a40d-479b-a1ad-a646835edde4", HomeURL: "https://eol.org", DataURL: "https://eol.org/data/provider_ids.csv.gz", IsOutlinkReady: true, OutlinkURL: "https://eol.org/pages/{}", OutlinkID: func(n NameInf) string { return n.RecordID }, }, 113: { Title: "Zoological names", TitleShort: "Zoological names", }, 117: { Title: "Birds of Tansania", TitleShort: "Birds of Tansania", }, 119: { Title: "Tansania Plant Specimens", TitleShort: "Tansania Plant Specimens", }, 142: { Title: "The Clements Checklist of Birds of the World", TitleShort: "The Clements Checklist of Birds", }, 147: { TitleShort: "VASCAN", }, 149: { Title: "Ocean Biodiversity Information System", TitleShort: "OBIS", }, 155: { TitleShort: "FishBase", UUID: "bacd21f0-44e0-43e2-914c-70929916f257", IsOutlinkReady: true, HomeURL: "https://www.fishbase.in/home.htm", }, 165: { TitleShort: "Tropicos", Description: "The Tropicos database links over 1.33M scientific names " + "with over 4.87M specimens and over 685K digital images. The data " + "includes over 150K references from over 52.6K publications offered " + "as a free service to the world’s scientific community.", IsOutlinkReady: true, OutlinkURL: "https://tropicos.org/name/{}", OutlinkID: func(n NameInf) string { return n.RecordID }, }, 167: { TitleShort: "IPNI", UUID: "6b3905ce-5025-49f3-9697-ddd5bdfb4ff0", HomeURL: "https://www.ipni.org/", IsOutlinkReady: true, OutlinkURL: "https://www.ipni.org/n/{}", OutlinkID: func(n NameInf) string { return n.RecordID }, }, 168: { TitleShort: "ION", UUID: "1137dfa3-5b8c-487d-b497-dc0938605864", HomeURL: "http://organismnames.com/", IsOutlinkReady: true, OutlinkURL: "http://www.organismnames.com/details.htm?lsid={}", OutlinkID: func(n NameInf) string { return n.RecordID }, }, 170: { TitleShort: "Arctos", UUID: "eea8315d-a244-4625-859a-226675622312", HomeURL: "https://arctosdb.org/", IsOutlinkReady: true, OutlinkURL: "https://arctos.database.museum/name/{}", OutlinkID: func(n NameInf) string { return url.QueryEscape(n.Canonical) }, }, 172: { TitleShort: "PaleoBioDB", UUID: "fad9970e-c358-4e1b-8cc3-f9ad2582751f", HomeURL: "https://paleobiodb.org/#/", IsOutlinkReady: true, }, 173: { TitleShort: "The Reptile DataBase", UUID: "c24e0905-4980-4e1d-aff2-ee0ef54ea1f8", HomeURL: "http://reptile-database.org/", IsOutlinkReady: true, }, 174: { TitleShort: "Mammal Species of the World", UUID: "464dafec-1037-432d-8449-c0b309e0a030", HomeURL: "https://www.departments.bucknell.edu/biology/resources/msw3/", DataURL: "https://www.departments.bucknell.edu/biology/resources/msw3/export.asp", IsOutlinkReady: true, OutlinkURL: "https://www.departments.bucknell.edu/biology/resources/msw3/browse.asp?s=y&id={}", OutlinkID: func(n NameInf) string { return n.LocalID }, }, 175: { TitleShort: "BirdLife International", UUID: "b1d8de7a-ab96-455f-acd8-f3fff2d7d169", HomeURL: "http://www.birdlife.org/", DataURL: "http://datazone.birdlife.org/species/taxonomy", IsOutlinkReady: true, OutlinkURL: "http://datazone.birdlife.org/species/results?thrlev1=&thrlev2=&kw={}", OutlinkID: func(n NameInf) string { return url.PathEscape(n.Canonical) }, }, 179: { TitleShort: "Open Tree of Life", UUID: "e10865e2-cdd9-4f97-912f-08f3d5ef49f7", IsOutlinkReady: true, HomeURL: "https://tree.opentreeoflife.org/", DataURL: "https://files.opentreeoflife.org/ott/", }, 181: { TitleShort: "IRMNG", UUID: "417454fa-a0a1-4b9c-814d-edc0f4f25ad8", IsOutlinkReady: true, HomeURL: "https://irmng.org/", DataURL: "https://irmng.org/export/", }, 183: { TitleShort: "Sherborn Index Animalium", UUID: "05ad6ca2-fc37-47f4-983a-72e535420e28", IsOutlinkReady: true, HomeURL: "https://www.sil.si.edu/DigitalCollections/indexanimalium/taxonomicnames/", DataURL: "https://www.sil.si.edu/DigitalCollections/indexanimalium/" + "Datasets/2006.01.06.TaxonomicData.csv", }, 184: { TitleShort: "ASM Mammal Diversity DB", UUID: "94270cdd-5424-4bb1-8324-46ccc5386dc7", HomeURL: "https://mammaldiversity.org/", DataURL: "https://mammaldiversity.org/", IsOutlinkReady: true, OutlinkURL: "https://mammaldiversity.org/species-account/species-id={}", OutlinkID: func(n NameInf) string { return n.AcceptedRecordID }, }, 185: { TitleShort: "IOC World Bird List", UUID: "6421ffec-38e3-40fb-a6d9-af27238a47a1", IsOutlinkReady: true, HomeURL: "https://www.worldbirdnames.org/", DataURL: "https://www.worldbirdnames.org/ioc-lists/master-list-2/", }, 186: { TitleShort: "MCZbase", UUID: "c79d055b-211b-40de-8e27-618011656265", IsOutlinkReady: true, HomeURL: "https://mczbase.mcz.harvard.edu/", OutlinkURL: "https://mczbase.mcz.harvard.edu/name/{}", OutlinkID: func(n NameInf) string { return url.PathEscape(n.Canonical) }, }, 187: { TitleShort: "Clements' Birds of the World", UUID: "577c0b56-4a3c-4314-8724-14b304f601de", IsOutlinkReady: true, HomeURL: "https://www.birds.cornell.edu/clementschecklist/", DataURL: "https://www.birds.cornell.edu/clementschecklist/download/", }, 188: { TitleShort: "American Ornithological Society", UUID: "91d38806-8435-479f-a18d-705e5cb0767c", HomeURL: "https://americanornithology.org/", IsOutlinkReady: true, DataURL: "https://checklist.americanornithology.org/taxa.csv", OutlinkURL: "https://checklist.americanornithology.org/taxa/{}", OutlinkID: func(n NameInf) string { return n.RecordID }, }, 189: { TitleShort: "Howard & Moore Birds of the World", UUID: "85023fe5-bf2a-486b-bdae-3e61cefd41fd", HomeURL: "https://www.howardandmoore.org/", IsOutlinkReady: true, DataURL: "https://www.howardandmoore.org/howard-and-moore-database/", }, 194: { TitleShort: "Plazi", UUID: "68938dc9-b93d-43bc-9d51-5c2a632f136f", HomeURL: "https://www.plazi.org/", IsOutlinkReady: true, DataURL: "http://tb.plazi.org/GgServer/xml.rss.xml", OutlinkURL: "http://tb.plazi.org/GgServer/html/{}", OutlinkID: func(n NameInf) string { return n.LocalID }, }, 195: { TitleShort: "AlgaeBase", UUID: "a5869bfb-7cbf-40f2-88d3-962922dac43f", HomeURL: "https://www.algaebase.org/", IsOutlinkReady: true, OutlinkURL: "https://www.algaebase.org/search/species/detail/?species_id={}", OutlinkID: func(n NameInf) string { return n.RecordID }, }, }
DataSourcesInf provides missing data for data_sources table.
Functions ¶
func QuoteString ¶
QuoteString makes a string value compatible with SQL synthax by wrapping it in quotes and escaping internal quotes.
Types ¶
type Canonical ¶
type Canonical struct { // UUID v5 generated for simple canonical form. ID string `gorm:"type:uuid;primary_key;auto_increment:false"` // Canonical name-string Name string `gorm:"type:varchar(255);index:canonical_name;not null"` }
Canonical is a 'simple' canonical form.
type CanonicalData ¶
type CanonicalData struct { ID string Value string FullID string FullValue string StemID string StemValue string Cardinality int }
Canonical Data provides data about various canonical forms of a name-string.
type CanonicalFull ¶
type CanonicalFull struct { // UUID v5 generated for 'full' canonical form (with infraspecific ranks // and hybrid signs for named hybrids). ID string `gorm:"type:uuid;primary_key;auto_increment:false"` // Canonical name-string Name string `gorm:"type:varchar(255);not null"` }
CanonicalFull ia a full canonical form.
type CanonicalStem ¶
type CanonicalStem struct { // UUID v5 for the stemmed derivative of a simple canonical form. ID string `gorm:"type:uuid;primary_key;auto_increment:false"` // Stemmed canonical name-string Name string `gorm:"type:varchar(255);not null"` }
CanonicalStem is a stemmed derivative of a simple canonical form.
type DataSource ¶
type DataSource struct { // Hard-coded ID that corresponds to historic IDs given by old versions // of resolver. ID int `gorm:"type:smallint;primary_key;auto_increment:false"` // UUID assigned to the resource during creation. UUID is not displayed to // users, but is important for data import from DwCA files. UUID string `gorm:"type:uuid;default:'00000000-0000-0000-0000-000000000000'"` // Long title tries to follow the name of dataset given by its creators. Title string `gorm:"type:varchar(255)"` // Shortened/Abbreviated title. TitleShort string `gorm:"type:varchar(50)"` // Some datasets have versions. Version string `gorm:"type:varchar(50)"` // Time when the dataset was created. // Follows a format of a 'YYYY-MM-DD' || 'YYYY-MM' || 'YYYY'. RevisionDate string // DOI of the dataset (if exists). DOI string `gorm:"type:varchar(50)"` // A reference that can be used to cite the dataset. Citation string // Authors of the dataset. Authors string // Description of the dataset. Might include unstructured metainformation // as well. Description string // Home URL for the dataset. WebsiteURL string `gorm:"type:varchar(255)"` // Original url used to download the dataset. DataURL string `gorm:"type:varchar(255)"` // A template for creation of an outlink for a dataset record. It contains // a placeholder '{}' for the record's OutlinkID. OutlinkURL string // IsOutlinkReady means that the data-source has enough metainformation, // URLs, harvests to be generally good to be pointed out as a 'mature' // data-source at gnames. Resources that are harvested too long time ago // or do not have WebsiteURL/OutlinkURLs would normally have this flag set // to false. IsOutlinkReady bool // Is true if a dataset undergoes a significant manual curation. IsCurated bool // Is true if a dataset undergoes a significant automatic curation by // scripts. IsAutoCurated bool // Number of records in a dataset. RecordCount int // Timestamp when the dataset was imported last time. The timeset usually // does not corresponds to when the dataset was created. UpdatedAt time.Time `gorm:"type:timestamp without time zone"` }
DataSource describes metadata of a dataset.
type DataSourceInf ¶
type DataSourceInf struct { Title string TitleShort string Description string UUID string HomeURL string DataURL string IsOutlinkReady bool OutlinkURL string OutlinkID func(n NameInf) string }
DataSourceInf provides fields associated with a DataSource
type NameInf ¶
type NameInf struct { RecordID string AcceptedRecordID string LocalID string GlobalID string Canonical string CanonicalFull string }
NameInf provides fields associated with a name-string in a particular data source.
type NameString ¶
type NameString struct { // UUID v5 generated from the name-string using DNS:"globalnames.org" as // a seed. ID string `gorm:"type:uuid;primary_key;auto_increment:false"` // Name-string with authorships and annotations as it is given by a dataset. // Sometimes an authorship is concatenated with a name-string by our // import scripts. Name string `gorm:"type:varchar(255);not null"` // Year is the year when a name was published Year sql.NullInt16 `gorm:"type:int"` // Number of elements in a 'classic' Linnaen name: 0 - unknown, not available, // 1 - uninomial, 2 - binomial, 3 - trinomial etc. // Cardinality can be used to filter out surrogates and hybrid formulas -- // they would have cardinality 0. Cardinality sql.NullInt32 `gorm:"type:int"` // UUID v5 generated for simple canonical form. CanonicalID sql.NullString `gorm:"type:uuid;index:canonical"` // UUID v5 generated for 'full' canonical form (with infraspecific ranks // and hybrid signs for named hybrids). CanonicalFullID sql.NullString `gorm:"type:uuid;index:canonical_full"` // UUID v5 for the stemmed derivative of a simple canonical form. CanonicalStemID sql.NullString `gorm:"type:uuid;index:canonical_stem"` // Virus indicates if a name-string seems to be virus-like. Virus bool `gorm:"type:bool"` // Bacteria is true if parser marks a name as from Bactrial Code. Bacteria bool `gorm:"type:bool;not null;default:false"` // Surrogate indicates if a name-string is a surrogate name. Surrogate bool `gorm:"type:bool"` // ParseQuality is numeric representation of the quality of parsing. // 0 - no parse, 1 - clear parse, 2 - some problems, 3 - big problems. ParseQuality int `gorm:"type:int;not null;default:0"` }
NameString is a name-string extracted from a dataset.
type NameStringIndex ¶
type NameStringIndex struct { // Dataset ID DataSourceID int `gorm:"primary_key;auto_increment:false"` // Unique ID for record. We do our best to get it from the record IDs, either // global or local, but if all fails, id is assigned by gnames in a format of // 'gn_{int}'. RecordID string `gorm:"type:varchar(255);primary_key;auto_increment:false"` // The UUID5 of a full name-string from the dataset. NameStringID string `gorm:"type:uuid;index:name_string_id;primary_key;auto_increment:false"` // The id to create an outlink. OutlinkID string `gorm:"type:varchar(255)"` // Global id from the dataset. GlobalID string `gorm:"type:varchar(255)"` // Local id from the dataset. LocalID string `gorm:"type:varchar(255)"` // Nomenclatural code ID. 0 - no info, 1 - ICZN, 2 - ICN, 3 - ICNP, 4 - ICTV. CodeID int `gorm:"type:smallint"` // The rank of the name. Rank string `gorm:"type:varchar(255)"` // RecordID of a currently accepted name-string for the taxon. AcceptedRecordID string `gorm:"type:varchar(255);index:accepted_record_id"` // Pipe-delimited string containing classification supplied with the resource. Classification string // RecordIDs of the classificatiaon elements (if given). ClassificationIDs string // Ranks of the classification elements. ClassificationRanks string }
NameStringIndex is a name-strings relations to datasets.
type ParsedData ¶ added in v0.2.0
type PgDB ¶
func (PgDB) NewDb ¶
NewDb creates a database handler from sandard sql package. We use it to speed up import of the data.
type Rebuild ¶
Rebuild provides configuration for database rebuilding process
func NewRebuild ¶
NewRebuild creates new Rebuild structure for rebuilding process.
func (Rebuild) CreateWords ¶ added in v0.2.0
func (rb Rebuild) CreateWords()
func (Rebuild) RemoveOrphans ¶ added in v0.2.0
func (rb Rebuild) RemoveOrphans()
func (Rebuild) UploadDataSources ¶
UploadDataSources populates data_sources table with data.
func (Rebuild) UploadNameString ¶
UploadNameString constructs data for name_strings, canonicals, canonical_fulls, canonical_stems tables and uploads these data to the database.
func (Rebuild) UploadNameStringIndices ¶
func (rb Rebuild) UploadNameStringIndices()
UploadNameStringIndices constracts data for name_string_indices table and aploads them to the database.
func (Rebuild) VerificationView ¶ added in v0.2.0
func (rb Rebuild) VerificationView()
verificationView creates data for a materialized view.
type Word ¶ added in v0.2.0
type Word struct { // ID generated by combinding modified word and type converted to integer //together with a pipe, and generating UUID5 from it. //For example: "alb|2" ID string `gorm:"primary_key;type:uuid;auto_increment:false"` // Normalized is the word normalized by GNparser. This field is used // for sorting results. Normalized string `gorm:"type:varchar(255);primary_key;auto_increment:false"` // Modified is a heavy-normalized word. This field is used for matching. Modified string `gorm:"type:varchar(255);not null;index:words_modified"` // WordTypeID is the integer representation of parsed.WordType // from GNparser. TypeID int }
Word is a word from a name-string.
type WordNameString ¶ added in v0.2.0
type WordNameString struct { // WordID is the identifier of a word. WordID string `gorm:"primary_key;type:uuid;auto_increment:false"` // NameStringID is UUID5 of a full name-string from the dataset. NameStringID string `gorm:"primary_key;type:uuid;auto_increment:false"` // CanonicalID is UUID5 of a simple canonical form of a name CanonicalID string `gorm:"type:uuid;not_null"` }
WordNameString is the meaning of a word in a name-string.