dbapi

package

v0.4.1 Latest Latest Go to latest Published: Feb 6, 2026 License: GPL-3.0 Imports: 23 Imported by: 5

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/stts-se/pronlex

Links

Open Source Insights

Documentation ¶

Overview ¶

Package dbapi contains code wrapped around an SQL(ite3) DB. It is used for inserting, updating and retrieving lexical entries from a pronunciation lexicon database. A lexical entry is represented by the dbapi.Entry struct, that mirrors entries of the entry database table, along with associated tables such as transcription and lemma.

Package dbapi contains code wrapped around SQL(ite3) and MariaDB. It is used for inserting, updating and retrieving lexical entries from a pronunciation lexicon database. A lexical entry is represented by the lex.Entry struct, that mirrors entries of the entry database table, along with associated tables such as transcription and lemma.

Index ¶

Constants
Variables
func ImportMariaDBLexiconFile(db *sql.DB, lexiconName lex.LexName, logger Logger, lexiconFileName string, ...) error
func ImportSqliteLexiconFile(db *sql.DB, lexiconName lex.LexName, logger Logger, lexiconFileName string, ...) error
func RemoveEmptyStrings(ss []string) []string
func Sqlite3WithRegex()
func ToLower(ss []string) []string
func ValidateLexiconFile(logger Logger, lexiconFileName string, validator *validation.Validator, ...) error
type DBEngine
- func (i DBEngine) String() string
type DBIF
type DBMQuery
type DBManager
- func NewDBManager(engine DBEngine) (*DBManager, error)
- func NewMariaDBManager() *DBManager
- func NewSqliteDBManager() *DBManager
- func (dbm *DBManager) AddDB(dbRef lex.DBRef, db *sql.DB) error
- func (dbm *DBManager) CloseDB(dbRef lex.DBRef) error
- func (dbm *DBManager) ContainsDB(dbRef lex.DBRef) bool
- func (dbm *DBManager) DBExists(dbLocation string, dbRef lex.DBRef) (bool, error)
- func (dbm *DBManager) DefineDB(dbLocation string, dbRef lex.DBRef) error
- func (dbm *DBManager) DefineLexicon(lexRef lex.LexRef, symbolSetName string, locale string) error
- func (dbm *DBManager) DefineLexicons(dbRef lex.DBRef, symbolSetName string, locale string, lexes ...lex.LexName) error
- func (dbm *DBManager) DeleteEntry(entryID int64, lexRef lex.LexRef) (int64, error)
- func (dbm *DBManager) DeleteLexicon(lexRef lex.LexRef) error
- func (dbm *DBManager) DropDB(dbLocation string, dbRef lex.DBRef) error
- func (dbm DBManager) Engine() DBEngine
- func (dbm *DBManager) EntryCount(lexRef lex.LexRef) (int64, error)
- func (dbm *DBManager) FirstTimePopulateDBCache(dbLocation string) error
- func (dbm *DBManager) GetLexicon(lexRef lex.LexRef) (lex.LexRefWithInfo, error)
- func (dbm *DBManager) GetSchemaVersion(dbRef lex.DBRef) (string, error)
- func (dbm *DBManager) ImportLexiconFile(lexRef lex.LexRef, logger Logger, lexiconFileName string, ...) error
- func (dbm *DBManager) InsertEntries(lexRef lex.LexRef, entries []lex.Entry) ([]int64, error)
- func (dbm *DBManager) LexiconExists(lexRef lex.LexRef) (bool, error)
- func (dbm *DBManager) LexiconStats(lexRef lex.LexRef) (LexStats, error)
- func (dbm *DBManager) ListAllEntryStatuses(lexRef lex.LexRef) ([]string, error)
- func (dbm *DBManager) ListCommentLabels(lexRef lex.LexRef) ([]string, error)
- func (dbm *DBManager) ListCurrentEntryStatuses(lexRef lex.LexRef) ([]string, error)
- func (dbm *DBManager) ListCurrentEntryStatusesWithFreq(lexRef lex.LexRef) (map[string]int, error)
- func (dbm *DBManager) ListCurrentEntryUsers(lexRef lex.LexRef) ([]string, error)
- func (dbm *DBManager) ListCurrentEntryUsersWithFreq(lexRef lex.LexRef) (map[string]int, error)
- func (dbm *DBManager) ListDBNames() ([]lex.DBRef, error)
- func (dbm *DBManager) ListIDs(lexRef lex.LexRef) ([]int64, error)
- func (dbm *DBManager) ListLexicons() ([]lex.LexRefWithInfo, error)
- func (dbm *DBManager) Locale(lexRef lex.LexRef) (string, error)
- func (dbm DBManager) Lock()
- func (dbm *DBManager) LookUp(q DBMQuery, out lex.EntryWriter) error
- func (dbm *DBManager) LookUpIntoMap(q DBMQuery) (map[lex.DBRef][]lex.Entry, error)
- func (dbm *DBManager) LookUpIntoSlice(q DBMQuery) ([]lex.Entry, error)
- func (dbm *DBManager) MoveNewEntries(dbRef lex.DBRef, fromLex, toLex lex.LexName, newSource, newStatus string) (MoveResult, error)
- func (dbm *DBManager) OpenDB(dbLocation string, dbRef lex.DBRef) error
- func (dbm DBManager) RLock()
- func (dbm DBManager) RUnlock()
- func (dbm *DBManager) RemoveDB(dbRef lex.DBRef) error
- func (dbm DBManager) Unlock()
- func (dbm *DBManager) UpdateEntry(e lex.Entry) (lex.Entry, bool, error)
- func (dbm *DBManager) UpdateValidation(e lex.Entry) error
- func (dbm *DBManager) Validate(lexRef lex.LexRef, logger Logger, vd validation.Validator, q Query) (ValStats, error)
- func (dbm *DBManager) ValidationStats(lexRef lex.LexRef) (ValStats, error)
type LatestUpdatesPerSource
type LexStats
type Logger
type MoveResult
type PrintMode
type Query
- func NewQuery() Query
- func (q Query) Empty() bool
type QueryStats
type SilentLogger
- func (l SilentLogger) LogInterval() int
- func (l SilentLogger) Progress(s string)
- func (l SilentLogger) Write(s string)
type StatusFreq
type StderrLogger
- func (l StderrLogger) LogInterval() int
- func (l StderrLogger) Progress(s string)
- func (l StderrLogger) Write(s string)
type StdoutLogger
- func (l StdoutLogger) LogInterval() int
- func (l StdoutLogger) Progress(s string)
- func (l StdoutLogger) Write(s string)
type Symbol
type ValStats
type WebSockLogger
- func NewWebSockLogger(websock *websocket.Conn) WebSockLogger
- func (l WebSockLogger) LogInterval() int
- func (l WebSockLogger) Progress(msg string)
- func (l WebSockLogger) Write(msg string)

Constants ¶

View Source

const SchemaVersion = "3.1"

SchemaVersion defines the version of the schema structure. It is used for validating databases against the current version number. It will be updated manually when the structure of the schema/database is changed. Versions with the same prefix (e.g., 3 and 3.1) are compatible.

View Source

const SqliteSchema = `

-- TODO: Remove!
--DROP TABLE IF EXISTS SchemaVersion, Lexicon, Entry, EntryComment, Lemma2Entry, Lemma, Transcription, EntryTag, EntryValidation, EntryStatus;

-- To keep track of the version of this schema
CREATE TABLE SchemaVersion (name varchar(255) not null);

INSERT INTO SchemaVersion VALUES (` + SchemaVersion + `);

-- Each lexical entry belongs to a lexicon.
-- The Lexicon table defines a lexicon through a unique name, along with the name a of symbol set and a locale
CREATE TABLE Lexicon (
    name varchar(128) not null,
    symbolSetName varchar(128) not null,
    locale varchar(128) not null,
    id integer not null primary key autoincrement
  );
CREATE UNIQUE INDEX idx1e0404a1 on Lexicon (name);
CREATE UNIQUE INDEX namesymset on Lexicon (name, symbolSetName);

-- Symbol set handling moved to file based solution
-- A symbol set is the definition of allowed symbols in a lexicons phonetical transcriptions
-- CREATE TABLE Symbolset (
    -- description varchar(128),
    -- description text,
    -- symbol varchar(128) not null,
    -- id integer not null primary key autoincrement,
    -- category varchar(128) not null,
    -- lexiconId integer not null,
    -- ipa varchar(128)
--   );
-- CREATE INDEX idx37380686 on Symbolset (symbol);
-- CREATE UNIQUE INDEX idx8bc90a52 on Symbolset (lexiconId,symbol);

-- Lemma forms, or stems, are uninflected (theoretical, one might say) forms of words
CREATE TABLE Lemma (
    reading varchar(128) not null,
    id integer not null primary key autoincrement,
    paradigm varchar(128),
    -- strn varchar(128) not null
    strn text not null
  );
CREATE INDEX idx21d604f4 on Lemma (reading);
CREATE INDEX idx273f055f on Lemma (paradigm);
CREATE INDEX idx149303e1 on Lemma (strn);
CREATE INDEX lemidstrn on Lemma (id, strn);
CREATE UNIQUE INDEX idx407206e8 on Lemma (strn,reading);
--CREATE TABLE SurfaceForm (
--    id integer not null primary key autoincrement,
--    strn varchar(128) not null
--  );
--CREATE UNIQUE INDEX idx35390652 on SurfaceForm (strn);

-- The actual lexical entries live in this table.
-- Each entry is linked to a single lexicon, and may have one or more 
-- phonetic transcriptions, found in their own table.
CREATE TABLE Entry (
    -- wordParts varchar(128),
    wordParts text,
    label varchar(128), -- TODO What's this?!
    id integer not null primary key autoincrement,
    language varchar(128) not null,
    -- strn varchar(128) not null,
    strn text not null,
    lexiconId integer not null,
    partOfSpeech varchar(128),
    morphology varchar(128),
    preferred integer not null default 0, -- TODO Why doesn't it work when changing integer -> boolean? 
foreign key (lexiconId) references Lexicon(id));
CREATE INDEX idx28d70584 on Entry (language);
CREATE INDEX idx15890407 on Entry (strn);
CREATE INDEX entrylexid ON Entry (lexiconId);
CREATE INDEX entrypref ON Entry (preferred);
CREATE INDEX idx4a250778 on Entry (strn,language);
CREATE INDEX estrnpref on Entry (strn,preferred);
CREATE INDEX idid on Entry (id, lexiconId);


-- CREATE TABLE Tag (
--     strn text not null,
--     id integer not null primary key autoincrement,
-- );
-- CREATE UNIQUE INDEX tagindex ON Tag (strn);

-- Entry tag is a string used to distinguish between homographs.
-- Unique for an entry of a specific word form, but not for different
-- word forms. NOTE: This can be further normalized into a separate Tag
-- table, for reusable tags.
CREATE TABLE EntryTag (
    -- id integer not null primary key autoincrement,
    entryId integer not null,
    tag text not null,
    wordForm text, -- not null,
    FOREIGN KEY (entryId) REFERENCES Entry(id) ON DELETE CASCADE
);

-- A single tag per entry
CREATE UNIQUE INDEX tageid ON EntryTag(entryId);
CREATE UNIQUE INDEX tagentwf ON EntryTag(tag, wordForm);

-- Pick the entry word form from the Entry table
CREATE TRIGGER entryTagTrigger AFTER INSERT ON entryTag
   BEGIN
     UPDATE EntryTag SET wordForm = (select strn from entry where id = entryid) WHERE EntryTag.entryId = NEW.entryId;
   END;

CREATE TRIGGER entryTagTrigger2 AFTER UPDATE ON entryTag
   BEGIN
     UPDATE EntryTag SET wordForm = (select strn from entry where id = entryid) WHERE EntryTag.entryId = NEW.entryId;
   END;


CREATE TABLE EntryComment (
    id integer not null primary key autoincrement,
    entryId integer not null,
    source text,
    label text not null,
    comment text, -- not null,
    -- Timestamp DATETIME DEFAULT CURRENT_TIMESTAMP not null,
    FOREIGN KEY (entryId) REFERENCES Entry(id) ON DELETE CASCADE
);

CREATE INDEX cmtlabelndx ON EntryComment(label); 
CREATE INDEX cmtsrcndx ON EntryComment(source); 


-- Validiation results of entries
CREATE TABLE EntryValidation (
    id integer not null primary key autoincrement,
    entryid integer not null,
    level varchar(128) not null,
    name varchar(128) not null,
    -- message varchar(128) not null,
    message text not null,
    Timestamp DATETIME DEFAULT CURRENT_TIMESTAMP not null,
    foreign key (entryId) references Entry(id) on delete cascade);
CREATE INDEX evallev ON EntryValidation(level);
CREATE INDEX evalnam ON EntryValidation(name);
CREATE INDEX entvalEid ON EntryValidation(entryId); 
CREATE INDEX identvalEid ON EntryValidation(id,entryId); 

-- Status of entries
CREATE TABLE EntryStatus (
    name varchar(128) not null,
    source varchar(128) not null,
    entryId integer not null,
    Timestamp DATETIME DEFAULT CURRENT_TIMESTAMP not null,
    current boolean default 1 not null,
    id integer not null primary key autoincrement,
    UNIQUE(entryId,id),
    foreign key (entryId) references Entry(id) on delete cascade);
CREATE INDEX esn ON EntryStatus (name);
CREATE INDEX ess ON EntryStatus (source);
CREATE INDEX esc ON EntryStatus (current);
CREATE INDEX esceid ON EntryStatus (entryId);
CREATE INDEX entryidcurrent ON EntryStatus (entryId, current);
CREATE UNIQUE INDEX eseii ON EntryStatus  (id, entryId);
CREATE UNIQUE INDEX eseiicurr ON EntryStatus  (id, entryId, current);
CREATE UNIQUE INDEX idcurr ON EntryStatus  (id, current);

CREATE TABLE Transcription (
    entryId integer not null,
    preference int,
    label varchar(128),
    -- symbolSetCode varchar(128) not null,
    id integer not null primary key autoincrement,
    language varchar(128) not null,
    -- strn varchar(128) not null,
    strn text not null,
    sources TEXT not null,
foreign key (entryId) references Entry(id) on delete cascade);
CREATE INDEX traeid ON Transcription (entryId);
CREATE INDEX idtraeid ON Transcription (id, entryId);

-- CREATE TABLE TranscriptionStatus (
--    name varchar(128) not null,
--    source varchar(128) not null,
--    timestamp timestamp not null,
--    transcriptionId integer not null,
--    id integer not null primary key autoincrement,
-- foreign key (transcriptionId) references Transcription(id) on delete cascade);
-- CREATE INDEX nizze ON TranscriptionStatus (transcriptionId); 

-- Linking table between a lemma form and its different surface forms 
CREATE TABLE Lemma2Entry (
    entryId bigint not null,
    lemmaId bigint not null,
unique(lemmaId,entryId),
foreign key (entryId) references Entry(id) on delete cascade,
foreign key (lemmaId) references Lemma(id) on delete cascade);
--CREATE INDEX l2eind1 on Lemma2Entry (entryId);
CREATE INDEX l2eind2 on Lemma2Entry (lemmaId);
CREATE UNIQUE INDEX l2euind on Lemma2Entry (lemmaId,entryId);
CREATE UNIQUE INDEX idx46cf073d on Lemma2Entry (entryId);

-- CREATE TABLE SurfaceForm2Entry (
--    entryId bigint not null,
--    surfaceFormId bigint not null,
-- unique(surfaceFormId,entryId));

-- Triggers to ensure only one preferred = 1 per orthographic word
-- When a new entry is added, where preferred is not 0, all other entries for 
-- the same orthographic word (entry.strn), will have the preferred field set to 0.
-- CREATE TRIGGER insertPref BEFORE INSERT ON ENTRY
--   BEGIN
--     UPDATE entry SET preferred = 0 WHERE strn = NEW.strn AND NEW.preferred <> 0 AND lexiconid = NEW.lexiconid;
--   END;
-- CREATE TRIGGER updatePref BEFORE UPDATE ON ENTRY
--   BEGIN
--     UPDATE entry SET preferred = 0 WHERE strn = NEW.strn AND NEW.preferred <> 0 AND lexiconid = NEW.lexiconid;
--   END;

-- Triggers to ensure that there are only one entry status per entry
CREATE TRIGGER insertEntryStatus BEFORE INSERT ON ENTRYSTATUS
  BEGIN 
    UPDATE entrystatus SET current = 0 WHERE entryid = NEW.entryid AND NEW.current <> 0;
  END;
 CREATE TRIGGER updateEntryStatus BEFORE UPDATE ON ENTRYSTATUS
  BEGIN
    UPDATE entrystatus SET current = 0 WHERE entryid = NEW.entryid AND NEW.current <> 0;
  END;
`

SqliteSchema is a string containing the SQL definition of the lexicon database

Variables ¶

View Source

var MariaDBSchema = []string{
	`CREATE TABLE SchemaVersion (name text not null);`,

	`INSERT INTO SchemaVersion VALUES (` + SchemaVersion + `);`,

	`CREATE TABLE Lexicon (
	    name varchar(128) not null,
	    symbolSetName varchar(128) not null,
	    locale varchar(128) not null,
	    id integer not null primary key auto_increment
	  );`,
	`CREATE UNIQUE INDEX name ON Lexicon (name);`,
	`CREATE UNIQUE INDEX namesymset ON Lexicon (name, symbolSetName);`,

	`CREATE TABLE Lemma (
	    id integer not null primary key auto_increment,
	    reading varchar(128) not null,
	    paradigm varchar(128),
	    -- strn varchar(128) not null
	    strn text not null
	  );`,
	`CREATE INDEX reading on Lemma (reading);`,
	`CREATE INDEX paradigm on Lemma (paradigm);`,
	`CREATE INDEX strn on Lemma (strn(255));`,
	`CREATE INDEX lemidstrn on Lemma (id, strn(255));`,
	`-- TODO: NB: strn length is set to 128 since 255 as used elswhere is too
	-- long in this multi-column index.
	CREATE UNIQUE INDEX strnreading on Lemma (strn(128),reading);`,

	`-- The actual lexical entries live in this table.
	-- Each entry is linked to a single lexicon, and may have one or more
	-- phonetic transcriptions, found in their own table.
	CREATE TABLE Entry (
	    -- wordParts varchar(128),
	    id integer not null primary key auto_increment,
	    wordParts text,
	    label varchar(128), -- TODO What's this?!
	    language varchar(128) not null,
	    -- strn varchar(128) not null,
	    strn text not null,
	    lexiconId integer not null,
	    partOfSpeech varchar(128),
	    morphology varchar(128),
	    preferred integer not null default 0, -- TODO Why doesn't it work when changing integer -> boolean?
	    foreign key fk_3  (lexiconId) references Lexicon(id));`,

	`CREATE INDEX language on Entry (language);`,
	`CREATE INDEX strn on Entry (strn(255));`,
	`CREATE INDEX lexiconId ON Entry (lexiconId);`,
	`CREATE INDEX entrypref ON Entry (preferred);`,
	`CREATE INDEX strnlangue on Entry (strn(255),language);`,
	`CREATE INDEX estrnpref on Entry (strn(255),preferred);`,
	`CREATE INDEX idid on Entry (id, lexiconId);`,

	`-- Entry tag is a string used to distinguish between homographs.
	-- Unique for an entry of a specific word form, but not for different
	-- word forms. NOTE: This can be further normalized into a separate Tag
	-- table, for reusable tags.
	CREATE TABLE EntryTag (
	    -- id integer not null primary key auto_increment,
	    entryId integer not null,
	    tag text not null,
	    wordForm text, -- not null,
	    FOREIGN KEY fk_4 (entryId) REFERENCES Entry(id) ON DELETE CASCADE
	);`,
	`-- A single tag per entry
	CREATE UNIQUE INDEX tageid ON EntryTag(entryId);`,

	`-- TODO: NB: tag and wordForm length is set to 128 since 255 as used elswhere is too
	-- long in this multi-column index.
	CREATE UNIQUE INDEX tagentwf ON EntryTag(tag(128), wordForm(128));`,

	`CREATE TABLE EntryComment (
	    id integer not null primary key auto_increment,
	    entryId integer not null,
	    source text,
	    label text not null,
	    comment text, -- not null,
	    -- Timestamp DATETIME DEFAULT CURRENT_TIMESTAMP not null,
	    FOREIGN KEY fk_5 (entryId) REFERENCES Entry(id) ON DELETE CASCADE
	);`,
	`CREATE INDEX cmtlabelndx ON EntryComment(label(255));`,
	`CREATE INDEX cmtsrcndx ON EntryComment(source(255));`,
	`-- Validiation results of entries
	CREATE TABLE EntryValidation (
	    id integer not null primary key auto_increment,
	    entryId integer not null,
	    level varchar(128) not null,
	    name varchar(128) not null,
	    -- message varchar(128) not null,
	    message text not null,
	    Timestamp DATETIME DEFAULT CURRENT_TIMESTAMP not null,
	    foreign key fk_6 (entryId) references Entry(id) on delete cascade);`,
	`CREATE INDEX evallev ON EntryValidation(level);`,
	`CREATE INDEX evalnam ON EntryValidation(name);`,
	`CREATE INDEX entvalEid ON EntryValidation(entryId);`,
	`CREATE INDEX identvalEid ON EntryValidation(id,entryId);`,
	`-- Status of entries
	CREATE TABLE EntryStatus (
	    name varchar(128) not null,
	    source varchar(128) not null,
	    entryId integer not null,
	    Timestamp DATETIME DEFAULT CURRENT_TIMESTAMP not null,
	    current boolean default 1 not null,
	    id integer not null primary key auto_increment,
	    UNIQUE(entryId,id),
	    foreign key fk_7 (entryId) references Entry(id) on delete cascade);`,
	`CREATE INDEX esn ON EntryStatus (name);`,
	`CREATE INDEX ess ON EntryStatus (source);`,
	`CREATE INDEX esc ON EntryStatus (current);`,
	`CREATE INDEX esceid ON EntryStatus (entryId);`,
	`CREATE INDEX entryidcurrent ON EntryStatus (entryId, current);`,
	`CREATE UNIQUE INDEX eseii ON EntryStatus  (id, entryId);`,
	`CREATE UNIQUE INDEX eseiicurr ON EntryStatus  (id, entryId, current);`,
	`CREATE UNIQUE INDEX idcurr ON EntryStatus  (id, current);`,
	`CREATE TABLE Transcription (
	    entryId integer not null,
	    preference int,
	    label varchar(128),
	    -- symbolSetCode varchar(128) not null,
	    id integer not null primary key auto_increment,
	    language varchar(128) not null,
	    -- strn varchar(128) not null,
	    strn text not null,
	    sources TEXT not null,
	    foreign key fk_8 (entryId) references Entry(id) on delete cascade);`,

	`CREATE INDEX traeid ON Transcription (entryId);`,
	`CREATE INDEX idtraeid ON Transcription (id, entryId);`,

	`-- Linking table between a lemma form and its different surface forms
	CREATE TABLE Lemma2Entry (
	    entryId integer not null,
	    lemmaId integer not null,
	    unique(lemmaId,entryId),
	    -- unique(entryId, lemmaId),
	    FOREIGN KEY fk_1 (entryId) REFERENCES Entry(id) ON DELETE CASCADE,
	    FOREIGN KEY fk_2 (lemmaId) REFERENCES Lemma(id) ON DELETE CASCADE);`,

	`CREATE INDEX l2eind2 on Lemma2Entry (lemmaId);`,
	`CREATE UNIQUE INDEX l2euind on Lemma2Entry (lemmaId,entryId);`,
	`CREATE UNIQUE INDEX idx46cf073d on Lemma2Entry (entryId);`,
}

Functions ¶

func ImportMariaDBLexiconFile ¶ added in v0.4.1

func ImportMariaDBLexiconFile(db *sql.DB, lexiconName lex.LexName, logger Logger, lexiconFileName string, validator *validation.Validator) error

ImportMariDBLexiconFile is intended for 'clean' imports. It doesn't check whether the words already exist and so on. It does not do any sanity checks whatsoever of the transcriptions before they are added. If the validator parameter is initialized, each entry will be validated before import, and the validation result will be added to the db.

func ImportSqliteLexiconFile ¶ added in v0.4.1

func ImportSqliteLexiconFile(db *sql.DB, lexiconName lex.LexName, logger Logger, lexiconFileName string, validator *validation.Validator) error

ImportSqliteLexiconFile is intended for 'clean' imports. It doesn't check whether the words already exist and so on. It does not do any sanity checks whatsoever of the transcriptions before they are added. If the validator parameter is initialized, each entry will be validated before import, and the validation result will be added to the db.

func RemoveEmptyStrings ¶

func RemoveEmptyStrings(ss []string) []string

RemoveEmptyStrings does that

func Sqlite3WithRegex ¶

func Sqlite3WithRegex()

Sqlite3WithRegex registers an Sqlite3 driver with regexp support. (Unfortunately quite slow regexp matching)

func ToLower ¶

func ToLower(ss []string) []string

ToLower lower-cases its input strings

func ValidateLexiconFile ¶

func ValidateLexiconFile(logger Logger, lexiconFileName string, validator *validation.Validator, printMode PrintMode) error

ValidateLexiconFile validates the input file and prints any validation errors to the specified logger.

Types ¶

type DBEngine ¶ added in v0.4.1

type DBEngine int

const (
	Sqlite DBEngine = iota

	MariaDB
)

func (DBEngine) String ¶ added in v0.4.1

func (i DBEngine) String() string

type DBIF ¶ added in v0.4.1

type DBIF interface {
	// contains filtered or unexported methods
}

DBIE is an interface that contains methods that make up the db api. This interface is used to create db connetions to different databases. NOTE: This interface is HUGE and may warrant refactoring. It was created to be able to add MariaDB support in addition to the original Sqlite (an afterthought).

type DBMQuery ¶

type DBMQuery struct {
	LexRefs []lex.LexRef
	Query   Query
}

DBMQuery is a query used by the DBManager, containing lexicon referenes (db+lex name) and a dbapi.Query

type DBManager ¶

type DBManager struct {
	MaxOpenConns int
	// contains filtered or unexported fields
}

DBManager is used by external services (i.e., lexserver) to cache sql database instances along with their names

func NewDBManager ¶

func NewDBManager(engine DBEngine) (*DBManager, error)

NewDBManager creates a new DBManager instance with empty cache

func NewMariaDBManager ¶ added in v0.4.1

func NewMariaDBManager() *DBManager

NewMariaDBManager creates a new DBManager instance with empty cache

func NewSqliteDBManager ¶ added in v0.4.1

func NewSqliteDBManager() *DBManager

NewSqliteDBManager creates a new DBManager instance with empty cache

func (*DBManager) AddDB ¶

func (dbm *DBManager) AddDB(dbRef lex.DBRef, db *sql.DB) error

AddDB is used to add a database to the cached map of available databases. It does NOT create the database on disk. To create AND add the database, use DefineDB instead. To open and add an existing db, use OpenDB

func (*DBManager) CloseDB ¶

func (dbm *DBManager) CloseDB(dbRef lex.DBRef) error

CloseDB is used to close the specified database

func (*DBManager) ContainsDB ¶

func (dbm *DBManager) ContainsDB(dbRef lex.DBRef) bool

ContainsDB checks if the input database reference exists

func (*DBManager) DBExists ¶ added in v0.4.1

func (dbm *DBManager) DBExists(dbLocation string, dbRef lex.DBRef) (bool, error)

DBExists checks if a database exist. For Sqlite, it checks if the actual database file exists. For MariaDB, it checks if the database exists, and contains tables required for a lexicon database. The reason for this is how the user privileges work for MariaDB. See also DefinedDB and DropDB.

func (*DBManager) DefineDB ¶ added in v0.4.1

func (dbm *DBManager) DefineDB(dbLocation string, dbRef lex.DBRef) error

DefineDB is used to define a new database and add it to the DB manager cache. For Sqlite, the database is created, for MariaDB, it has to be created beforehand by an administrator. In both cases, all required tables and triggers are added to the database.

func (*DBManager) DefineLexicon ¶

func (dbm *DBManager) DefineLexicon(lexRef lex.LexRef, symbolSetName string, locale string) error

DefineLexicon saves the name of a new lexicon to the db.

func (*DBManager) DefineLexicons ¶

func (dbm *DBManager) DefineLexicons(dbRef lex.DBRef, symbolSetName string, locale string, lexes ...lex.LexName) error

DefineLexicons saves the names of the new lexicons to the db.

func (*DBManager) DeleteEntry ¶ added in v0.4.1

func (dbm *DBManager) DeleteEntry(entryID int64, lexRef lex.LexRef) (int64, error)

DeleteEntry deletes an entry from the database

func (*DBManager) DeleteLexicon ¶

func (dbm *DBManager) DeleteLexicon(lexRef lex.LexRef) error

DeleteLexicon deletes the lexicon from the associated lexicon database. Returns an error if the lexicon doesn't exist, or if the lexicon is not empty.

func (*DBManager) DropDB ¶ added in v0.4.1

func (dbm *DBManager) DropDB(dbLocation string, dbRef lex.DBRef) error

DropDB drop the database (cannot be undone). For Sqlite, the database is entirely dropped, for MariaDB, all database tables are dropped, but the database is not deleted. Deletion of MariaDB databases should be done by a server admiinstrator.

func (DBManager) Engine ¶ added in v0.4.1

func (dbm DBManager) Engine() DBEngine

func (*DBManager) EntryCount ¶

func (dbm *DBManager) EntryCount(lexRef lex.LexRef) (int64, error)

EntryCount counts the number of entries in a lexicon

func (*DBManager) FirstTimePopulateDBCache ¶ added in v0.4.1

func (dbm *DBManager) FirstTimePopulateDBCache(dbLocation string) error

FirstTimePopulateDBCache reads all available dbs into the database cache

func (*DBManager) GetLexicon ¶

func (dbm *DBManager) GetLexicon(lexRef lex.LexRef) (lex.LexRefWithInfo, error)

GetLexicon returns a information (LexRefWithInfo) matching a lexicon name in the db. Returns error if no such lexicon name in db

func (*DBManager) GetSchemaVersion ¶ added in v0.4.1

func (dbm *DBManager) GetSchemaVersion(dbRef lex.DBRef) (string, error)

GetSchemaVersion retrieves the schema version from the database

func (*DBManager) ImportLexiconFile ¶

func (dbm *DBManager) ImportLexiconFile(lexRef lex.LexRef, logger Logger, lexiconFileName string, validator *validation.Validator) error

ImportLexiconFile is intended for 'clean' imports. It doesn't check whether the words already exist and so on. It does not do any sanity checks whatsoever of the transcriptions before they are added. If the validator parameter is initialized, each entry will be validated before import, and the validation result will be added to the db.

func (*DBManager) InsertEntries ¶

func (dbm *DBManager) InsertEntries(lexRef lex.LexRef, entries []lex.Entry) ([]int64, error)

InsertEntries saves a list of Entries and associates them to the lexicon

func (*DBManager) LexiconExists ¶

func (dbm *DBManager) LexiconExists(lexRef lex.LexRef) (bool, error)

LexiconExists is used to check if the specified lexicon exists in the specified database

func (*DBManager) LexiconStats ¶

func (dbm *DBManager) LexiconStats(lexRef lex.LexRef) (LexStats, error)

LexiconStats calls the specified database a number of times, gathering different numbers, e.g. on how many entries there are in a lexicon.

func (*DBManager) ListAllEntryStatuses ¶

func (dbm *DBManager) ListAllEntryStatuses(lexRef lex.LexRef) ([]string, error)

ListAllEntryStatuses returns a list of all names EntryStatuses, also those that are not 'current' (i.e., the most recent status). In other words, this list potentially includes statuses not in use, but that have been used before.

func (*DBManager) ListCommentLabels ¶ added in v0.4.1

func (dbm *DBManager) ListCommentLabels(lexRef lex.LexRef) ([]string, error)

ListCommentLabels returns a list of all comment labels

func (*DBManager) ListCurrentEntryStatuses ¶

func (dbm *DBManager) ListCurrentEntryStatuses(lexRef lex.LexRef) ([]string, error)

ListCurrentEntryStatuses returns a list of all names EntryStatuses marked 'current' (i.e., the most recent status).

func (*DBManager) ListCurrentEntryStatusesWithFreq ¶ added in v0.4.1

func (dbm *DBManager) ListCurrentEntryStatusesWithFreq(lexRef lex.LexRef) (map[string]int, error)

ListCurrentEntryStatusesWithFreq returns a list of all names EntryStatuses marked 'current' (i.e., the most recent status), and the frequency for each status.

func (*DBManager) ListCurrentEntryUsers ¶ added in v0.4.1

func (dbm *DBManager) ListCurrentEntryUsers(lexRef lex.LexRef) ([]string, error)

ListCurrentEntryUsers returns a list of all names EntryUsers marked 'current' (i.e., the most recent status).

func (*DBManager) ListCurrentEntryUsersWithFreq ¶ added in v0.4.1

func (dbm *DBManager) ListCurrentEntryUsersWithFreq(lexRef lex.LexRef) (map[string]int, error)

ListCurrentEntryUsersWithFreq returns a map of all names EntryUsers marked 'current' (i.e., the most recent status), and the frequency for each user

func (*DBManager) ListDBNames ¶

func (dbm *DBManager) ListDBNames() ([]lex.DBRef, error)

ListDBNames lists all database names in the cached map of available databases. It does NOT verify what databases are actually existing on disk.

func (*DBManager) ListIDs ¶ added in v0.4.1

func (dbm *DBManager) ListIDs(lexRef lex.LexRef) ([]int64, error)

ListIDs is a wrapper around lookUpIds, returning a slice of ID's

func (*DBManager) ListLexicons ¶

func (dbm *DBManager) ListLexicons() ([]lex.LexRefWithInfo, error)

ListLexicons returns a list of defined lexicons, including database name, lexicon name, and symbol set name

func (*DBManager) Locale ¶ added in v0.4.1

func (dbm *DBManager) Locale(lexRef lex.LexRef) (string, error)

Locale looks up the locale for a specific lexicon

func (DBManager) Lock ¶ added in v0.4.1

func (dbm DBManager) Lock()

func (*DBManager) LookUp ¶

func (dbm *DBManager) LookUp(q DBMQuery, out lex.EntryWriter) error

LookUp takes a DBMQuery, searches the specified lexicon for the included search query. The result is written to a lex.EntryWriter.

func (*DBManager) LookUpIntoMap ¶

func (dbm *DBManager) LookUpIntoMap(q DBMQuery) (map[lex.DBRef][]lex.Entry, error)

LookUpIntoMap is a wrapper around LookUp, returning a map of Entries

func (*DBManager) LookUpIntoSlice ¶

func (dbm *DBManager) LookUpIntoSlice(q DBMQuery) ([]lex.Entry, error)

LookUpIntoSlice is a wrapper around LookUp, returning a slice of Entries

func (*DBManager) MoveNewEntries ¶

func (dbm *DBManager) MoveNewEntries(dbRef lex.DBRef, fromLex, toLex lex.LexName, newSource, newStatus string) (MoveResult, error)

MoveNewEntries moves lexical entries from the lexicon named fromLexicon to the lexicon named toLexicon. The 'newSource' string is the name of the new source of the entries to be moved, and 'newStatus' is the name of the new status to set on the moved entries. Currently, source and/or status may not be the empty string. TODO: Maybe it should be possible to skip source and status values?

Only "new" entries are moved, i.e., entries with lex.Entry.Strn values found in fromLexicon but *not* found in toLexicon. The rationale behind this function is to first create a small additional lexicon with new entries (the fromLexicon), that can later be appended to the master lexicon (the toLexicon).

func (*DBManager) OpenDB ¶ added in v0.4.1

func (dbm *DBManager) OpenDB(dbLocation string, dbRef lex.DBRef) error

OpenDB is used to open an existing database and add it to the DB manager cache.

func (DBManager) RLock ¶ added in v0.4.1

func (dbm DBManager) RLock()

func (DBManager) RUnlock ¶ added in v0.4.1

func (dbm DBManager) RUnlock()

func (*DBManager) RemoveDB ¶

func (dbm *DBManager) RemoveDB(dbRef lex.DBRef) error

RemoveDB is used to remove a database from the cached map of available databases. It does NOT remove from the database from disk.

func (DBManager) Unlock ¶ added in v0.4.1

func (dbm DBManager) Unlock()

func (*DBManager) UpdateEntry ¶

func (dbm *DBManager) UpdateEntry(e lex.Entry) (lex.Entry, bool, error)

UpdateEntry wraps call to UpdateEntryTx with a transaction, and returns the updated entry, fresh from the db

func (*DBManager) UpdateValidation ¶ added in v0.4.1

func (dbm *DBManager) UpdateValidation(e lex.Entry) error

UpdateValidation using the cached validation in the specified lex.Entry

func (*DBManager) Validate ¶

func (dbm *DBManager) Validate(lexRef lex.LexRef, logger Logger, vd validation.Validator, q Query) (ValStats, error)

Validate all entries given the specified lexRef and search query. Updates validation stats in db, and returns these.

func (*DBManager) ValidationStats ¶

func (dbm *DBManager) ValidationStats(lexRef lex.LexRef) (ValStats, error)

ValidationStats returns existing validation stats for the specified lexRef

type LatestUpdatesPerSource ¶ added in v0.4.1

type LatestUpdatesPerSource struct {
	Sources map[string]string `json:"sources"` // source name => timestamp
}

LatestUpdatesPerSource holds the latest status timestamp per source

type LexStats ¶

type LexStats struct {
	Lexicon string `json:"lexicon"`
	// The number of entries in the lexicon corresponding to database id LexiconID
	Entries int64 `json:"entries"`

	StatusFrequencies []StatusFreq `json:"statusFrequencies"`

	ValStats               ValStats
	LatestUpdatesPerSource LatestUpdatesPerSource
}

LexStats holds the result of a call to the dbapi.LexiconStats function.

type Logger ¶

type Logger interface {
	Progress(string)
	Write(string)
	LogInterval() int
}

Logger is an interface for logging progress and other messages

type MoveResult ¶

type MoveResult struct {
	N int64
}

MoveResult is returned from the MoveNewEntries function. TODO Since it only contains a single int64, this struct is probably not needed. Only useful if more info is to be returned.

type PrintMode ¶

type PrintMode int

PrintMode specified the type of output to print (all/valid/invalid)

const (
	// PrintAll prints all entries, valid or invalid
	PrintAll PrintMode = iota

	// PrintValid prints valid entries only
	PrintValid

	// PrintInvalid prints invalid entries only
	PrintInvalid
)

type Query ¶

type Query struct {
	// list of words to get corresponding entries for
	Words []string `json:"words"`
	// a 'like' db search expression matching words
	WordLike   string `json:"wordLike"`
	WordRegexp string `json:"wordRegexp"`

	WordParts       []string `json:"wordParts"`
	WordPartsLike   string   `json:"wordPartsLike"`
	WordPartsRegexp string   `json:"wordPartsRegexp"`

	// a slice of Entry.IDs to search for
	EntryIDs []int64 `json:"entryIds"`
	// a 'like' db search expression matching transcriptions
	TranscriptionLike   string `json:"transcriptionLike"`
	TranscriptionRegexp string `json:"transcriptionRegexp"`
	// a 'like' db search expression matching part of speech strings
	PartOfSpeechLike   string `json:"partOfSpeechLike"`
	PartOfSpeechRegexp string `json:"partOfSpeechRegexp"`

	MorphologyLike string `json:"morphologyLike"`

	// list of lemma forms to get corresponding entries for
	Lemmas []string `json:"lemmas"`
	// an SQL 'like' expression to match lemma forms
	LemmaLike   string `json:"lemmaLike"`
	LemmaRegexp string `json:"lemmaRegexp"`
	// an SQL 'like' expression to match lemma readings
	ReadingLike   string `json:"readingLike"`
	ReadingRegexp string `json:"readingRegexp"`
	// an SQL 'like' expression to match lemma paradigms
	ParadigmLike   string `json:"paradigmLike"`
	ParadigmRegexp string `json:"paradigmRegexp"`

	TagLike      string `json:"tagLike"`
	LanguageLike string `json:"languageLike"`

	CommentLabelLike  string `json:"commentLabelLike"`
	CommentSourceLike string `json:"commenSourceLike"`
	CommentLike       string `json:"commentLike"`

	// A list of entry statuses to match
	EntryStatus []string `json:"entryStatus"`

	// A list of users to match
	Users []string `json:"user"`

	// Select entries with one or more EntryValidations
	HasEntryValidation  bool   `json:"hasEntryValidation"`
	ValidationRuleLike  string `json:"validationRuleLike"`
	ValidationLevelLike string `json:"validationLevelLike"`

	MultipleTags bool `json:"multipleTags"`

	// the page returned by the SQL query's 'LIMIT' (starts at 1)
	Page int64 `json:"page"`
	// the page length of the SQL query's 'LIMIT'
	PageLength int64 `json:"pageLength"`
}

Query represents an sql search query to the lexicon database TODO Change to list(s) of search critieria. TODO add boolean for include/exclude (i.e., "NOT" in the generated SQL).

func NewQuery ¶

func NewQuery() Query

NewQuery returns a Query instance where PageLength: 0

func (Query) Empty ¶

func (q Query) Empty() bool

Empty returns true if there are not search criteria values This is no longer a sane way to do it, since the number of search criteria has grown.

type QueryStats ¶

type QueryStats struct {
	Query   Query `json:"query"`
	Entries int64 `json:"entries"`
}

QueryStats holds the result of a call to the dbapi.LexiconStats function. TODO add fields for additional stats

type SilentLogger ¶

type SilentLogger struct {
}

SilentLogger is a muted logger, used for testing to skip too much confusing test output

func (SilentLogger) LogInterval ¶

func (l SilentLogger) LogInterval() int

LogInterval speficies logging interval (to be used by the calling process)

func (SilentLogger) Progress ¶

func (l SilentLogger) Progress(s string)

Progress logs progress info

func (SilentLogger) Write ¶

func (l SilentLogger) Write(s string)

Write logs a message string

type StatusFreq ¶ added in v0.4.1

type StatusFreq struct {
	Status string `json:"status"`
	Freq   int64  `json:"freq"`
}

type StderrLogger ¶

type StderrLogger struct {
	LogIntervalVar int
}

StderrLogger is a logger for printing messages to standard error. Implements the dbapi.Logger interface.

func (StderrLogger) LogInterval ¶

func (l StderrLogger) LogInterval() int

LogInterval speficies logging interval (to be used by the calling process)

func (StderrLogger) Progress ¶

func (l StderrLogger) Progress(s string)

Progress logs progress info

func (StderrLogger) Write ¶

func (l StderrLogger) Write(s string)

Write logs a message string

type StdoutLogger ¶

type StdoutLogger struct {
	LogIntervalVar int
}

StdoutLogger is a logger for printing messages to standard out. Implements the dbapi.Logger interface.

func (StdoutLogger) LogInterval ¶

func (l StdoutLogger) LogInterval() int

LogInterval speficies logging interval (to be used by the calling process)

func (StdoutLogger) Progress ¶

func (l StdoutLogger) Progress(s string)

Progress logs progress info

func (StdoutLogger) Write ¶

func (l StdoutLogger) Write(s string)

Write logs a message string

type Symbol ¶

type Symbol struct {
	LexiconID   int64  `json:"lexiconId"`
	Symbol      string `json:"symbol"`
	Category    string `json:"category"`
	Description string `json:"description"`
	IPA         string `json:"ipa"`
}

Symbol corresponds to the symbol db table, and holds a phonetic symbol

type ValStats ¶

type ValStats struct {
	// TotalEntries is the total entries to be validated
	TotalEntries int

	// ValidatedEntries is the total validated entries so far
	ValidatedEntries int

	// TotalValidations is the total number of validation messages so far
	TotalValidations int

	// InvalidEntries is the number of invalid entries so far
	InvalidEntries int

	Levels map[string]int `json:"levels"`
	Rules  map[string]int `json:"rules"`
}

ValStats is used to incrementally give statistics during a validation process, or to just represent a final validation statistics.

type WebSockLogger ¶

type WebSockLogger struct {
	LogIntervalVar int
	// contains filtered or unexported fields
}

WebSockLogger is a logger for printing messages to a web socket. Implements the dbapi.Logger interface.

func NewWebSockLogger ¶

func NewWebSockLogger(websock *websocket.Conn) WebSockLogger

NewWebSockLogger creates a new websock logger using the input connection

func (WebSockLogger) LogInterval ¶

func (l WebSockLogger) LogInterval() int

LogInterval speficies logging interval (to be used by the calling process)

func (WebSockLogger) Progress ¶

func (l WebSockLogger) Progress(msg string)

Progress logs progress info

func (WebSockLogger) Write ¶

func (l WebSockLogger) Write(msg string)

Write logs a message string

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL