repo

package
v0.0.0-...-b29f18b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 4, 2024 License: Apache-2.0 Imports: 19 Imported by: 0

README

Repo

This package aggregates queries and stores them into a PG database.

Helpful DB Queries

Show all queries by database ordered by the most queried.

select d.name as db_name, sum(total_count) as tc, sum(total_duration) as td
from databases d join queries q ON d.uid = q.database_uid
group by d.uid
order by tc desc;

Show all queries by query ordered by the most queries in a specific database.

select masked_query, sum(total_count) as tc, sum(total_duration) as td
from databases d join queries q ON d.uid = q.database_uid
where d.name = 'datname'
group by q.uid
order by tc desc;

Show totals per command tag:

select
    command,
    count(1) as uniq_queries,
    sum(total_count) as total_count,
    trunc(sum(total_duration_us)/1000000, 3) as total_duration_sec,
    trunc((sum(total_duration_us)/sum(total_count))/1000000,3) as average_duration_sec
from queries
where command in ('ANALYZE', 'COMMIT', 'CREATE', 'DELETE', 'DROP', 'INSERT', 'ROLLBACK', 'SELECT', 'SET', 'UPDATE', 'WITH')
group by command
order by total_duration_sec desc;

Show creates by object type:

select cs.object_type,
       count(1) as unique_objects,
       sum(total_count) as total_count,
       sum(total_duration_us) as total_duration_us
from queries q
join create_statements_in_queries csq on q.uid = csq.query_uid
join create_statements cs on csq.create_statement_uid = cs.uid
group by cs.object_type
order by cs.object_type;

Show temp tables and indexes by name, removing numbers because a lot of people add a random number to the name:

select cs.object_type, regexp_replace(cs.name, '[0-9]', '', 'g'),
       count(1) as unique_objects,
       sum(total_count) as total_count,
       sum(total_duration_us) as total_duration_us
from queries q
join create_statements_in_queries csq on q.uid = csq.query_uid
join create_statements cs on csq.create_statement_uid = cs.uid
group by cs.object_type, regexp_replace(cs.name, '[0-9]', '', 'g')
order by total_count desc;

Show temp tables and indexes - per unique query:

select cs.object_type, cs.name, q.masked_query, total_count, total_duration_us
from queries q
join create_statements_in_queries csq on q.uid = csq.query_uid
join create_statements cs on csq.create_statement_uid = cs.uid
order by
    -- total_duration_us desc,
    total_count desc;

Show queries by username:

select user_name, count(1) as uniq_queries, sum(total_count) as total_count, trunc(sum(total_duration_us)/1000000, 3) as total_duration_sec
from query_users group by user_name order by total_count desc

Show slowest queries for a specific username. Note that we're pulling the totals from the query_users table since multiple users can run the same query:

select q.masked_query, qu.total_count, trunc(qu.total_duration_us::decimal/1000000,3) as total_duration_sec, trunc(q.average_duration_us::decimal/1000000,3) as average_duration_sec
from queries q
join query_users qu on q.uid = qu.query_uid where qu.user_name = 'queue_executor' order by q.total_duration_us desc;

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	Env        = "dev"
	DBHost     = "localhost"
	DBPort     = "5432"
	DBUser     = "postgres"
	DBPassword = ""
	DBName     = "lantern"
)

Default config values. These can be overwritten by the params passed in.

View Source
var UuidNamespace = uuid.MustParse("018e1b50-ee98-73f2-9839-420223323163")

UuidNamespace is the namespace for the uuid. There are 4 predefined namespaces, but you can also create your own.

Functions

func Conn

func Conn() *sql.DB

Conn returns a connection to the database.

func DisplayConfig

func DisplayConfig()

func ExecuteQuery

func ExecuteQuery(db *sql.DB, sql string)

func HasErr

func HasErr(msg string, err error) bool

func MarshalJSON

func MarshalJSON(data interface{}) string

func OverrideConfig

func OverrideConfig(strArgs map[string]*string, intArgs map[string]*int, boolArgs map[string]*bool)

func Ping

func Ping(db *sql.DB)

func SetDatabaseUID

func SetDatabaseUID(name string) uuid.UUID

func ShaQuery

func ShaQuery(query string) string

ShaQuery creates a sha of the query

func UnmarshalJSON

func UnmarshalJSON(data []byte, v interface{})

func UuidFromString

func UuidFromString(uid string) uuid.UUID

func UuidString

func UuidString(query string) string

func UuidV5

func UuidV5(str string) uuid.UUID

Types

type CreateStatement

type CreateStatement struct {
	UID              uuid.UUID `json:"uid,omitempty"`
	Scope            string    `json:"scope,omitempty"`
	IsUnique         bool      `json:"is_unique,omitempty"`
	UsedConcurrently bool      `json:"used_concurrently,omitempty"`
	IsTemp           bool      `json:"is_temp,omitempty"`
	IsUnlogged       bool      `json:"is_unlogged,omitempty"`
	ObjectType       string    `json:"object_type,omitempty"`
	IfNotExists      bool      `json:"if_not_exists,omitempty"`
	Name             string    `json:"name,omitempty"`
	OnCommit         string    `json:"on_commit,omitempty"`
	Operator         string    `json:"operator,omitempty"`
	Expression       string    `json:"expression,omitempty"`
	WhereClause      string    `json:"where_clause,omitempty"`
}

func (*CreateStatement) SetUID

func (c *CreateStatement) SetUID()

type CreateStatements

type CreateStatements struct {
	CreateStatements map[string]*CreateStatement `json:"create_statements,omitempty"`
}

func NewCreateStatements

func NewCreateStatements() *CreateStatements

func (*CreateStatements) Add

func (*CreateStatements) CountInDB

func (c *CreateStatements) CountInDB(db *sql.DB) int64

func (*CreateStatements) SetAll

func (c *CreateStatements) SetAll(db *sql.DB)

type CreateStatementsInQueries

type CreateStatementsInQueries struct {
	UID                uuid.UUID `json:"uid,omitempty"`
	CreateStatementUID uuid.UUID `json:"create_statement_uid,omitempty"`
	QueryUID           uuid.UUID `json:"query_uid,omitempty"`
}

type Database

type Database struct {
	UID      uuid.UUID `json:"uid,omitempty"`      // unique UUID of the database
	Name     string    `json:"name,omitempty"`     // the name of the database
	Template string    `json:"template,omitempty"` // the template of the database
}

type Databases

type Databases struct {
	Source    string               `json:"source,omitempty"`
	Databases map[string]*Database `json:"databases,omitempty"` // the key is the UUIDv5 sha of the database
}

func NewDatabases

func NewDatabases(source string) *Databases

func (*Databases) AddDatabase

func (d *Databases) AddDatabase(database, template string) *Database

func (*Databases) CountInDB

func (d *Databases) CountInDB() int

func (*Databases) Upsert

func (d *Databases) Upsert(db *sql.DB)

type Owner

type Owner struct {
	UID  uuid.UUID `json:"uid,omitempty"`  // unique UUID of the owner
	Name string    `json:"name,omitempty"` // the name of the owner
}

func (*Owner) SetUID

func (o *Owner) SetUID()

type Owners

type Owners struct {
	Owners map[string]*Owner `json:"owners,omitempty"`
}

func NewOwners

func NewOwners() *Owners

func (*Owners) Add

func (o *Owners) Add(name string) *Owner

func (*Owners) CountInDB

func (o *Owners) CountInDB(db *sql.DB) int

func (*Owners) Upsert

func (o *Owners) Upsert(db *sql.DB)

type OwnersImport

type OwnersImport struct {
	Table  string   `json:"table,omitempty"`
	Owners []string `json:"owners,omitempty"`
}

type OwnersImports

type OwnersImports struct {
	Data     []OwnersImport `json:"data,omitempty"`
	Database string         `json:"database,omitempty"`
}

func NewOwnersImports

func NewOwnersImports() *OwnersImports

func (*OwnersImports) Import

func (o *OwnersImports) Import(data []byte) error

func (*OwnersImports) Unmarshal

func (o *OwnersImports) Unmarshal(data []byte) error

type OwnersTables

type OwnersTables struct {
	UID      uuid.UUID `json:"uid,omitempty"`       // unique UUID of the row
	OwnerUID uuid.UUID `json:"owner_uid,omitempty"` // the UUID of the owner
	TableUID uuid.UUID `json:"table_uid,omitempty"` // the UUID of the table
}

func (*OwnersTables) SetUID

func (o *OwnersTables) SetUID()

type OwnersTablesCollection

type OwnersTablesCollection struct {
	OwnersTables map[string]*OwnersTables `json:"owners_tables,omitempty"`
}

func NewOwnersTablesCollection

func NewOwnersTablesCollection() *OwnersTablesCollection

NewOwnersTablesCollection creates a new OwnersTablesCollection struct

func (*OwnersTablesCollection) Add

Add adds a new OwnersTables to the collection

func (*OwnersTablesCollection) CountInDB

func (o *OwnersTablesCollection) CountInDB(db *sql.DB) int64

CountInDB returns the number of rows in the owners_tables table

func (*OwnersTablesCollection) Upsert

func (o *OwnersTablesCollection) Upsert(db *sql.DB)

Upsert inserts or updates the rows in the owners_tables table

type ProcessedFile

type ProcessedFile struct {
	UID         uuid.UUID `json:"uid,omitempty"`
	FileName    string    `json:"file_name,omitempty"`
	ProcessedAt time.Time `json:"processed_at,omitempty"`
}

func NewProcessedFile

func NewProcessedFile(fileName string) *ProcessedFile

func (*ProcessedFile) HasBeenProcessed

func (p *ProcessedFile) HasBeenProcessed(db *sql.DB) bool

func (*ProcessedFile) Processed

func (p *ProcessedFile) Processed(db *sql.DB)

type Queries

type Queries struct {
	Source                    string                                    `json:"source,omitempty"`
	Queries                   map[string]*Query                         `json:"queries,omitempty"`
	FunctionsInQueries        map[string]*extractor.FunctionsInQueries  `json:"functions_in_queries,omitempty"`
	ColumnsInQueries          map[string]*extractor.ColumnsInQueries    `json:"columns_in_queries,omitempty"`
	TablesInQueries           map[string]*extractor.TablesInQueries     `json:"tables_in_queries,omitempty"`
	TableJoinsInQueries       map[string]*extractor.TableJoinsInQueries `json:"table_joins_in_queries,omitempty"`
	Tables                    map[string]*extractor.Tables              `json:"tables,omitempty"`
	CreateStatementsInQueries map[string]*CreateStatementsInQueries     `json:"create_statements_in_queries,omitempty"`
	CreateStatements          map[string]*CreateStatement               `json:"create_statements,omitempty"`

	Errors map[string]int `json:"errors,omitempty"`
}

func NewQueries

func NewQueries(source string) *Queries

NewQueries creates a new Queries struct

func (*Queries) Analyze

func (q *Queries) Analyze(w QueryWorker) bool

Analyze processes a query and returns a bool whether or not the query was parsed successfully This ends up calling addQuery which adds the query to the Queries struct Then the Queries struct is cached as a JSON file

func (*Queries) CountInDB

func (q *Queries) CountInDB() int

func (*Queries) ExtractStats

func (q *Queries) ExtractStats()

func (*Queries) LogAggregateOfErrors

func (q *Queries) LogAggregateOfErrors()

func (*Queries) Process

func (q *Queries) Process() bool

func (*Queries) Upsert

func (q *Queries) Upsert()

func (*Queries) UpsertColumnsInQueries

func (q *Queries) UpsertColumnsInQueries()

func (*Queries) UpsertCreateStatements

func (q *Queries) UpsertCreateStatements()

func (*Queries) UpsertCreateStatementsInQueries

func (q *Queries) UpsertCreateStatementsInQueries()

func (*Queries) UpsertQueryByHours

func (q *Queries) UpsertQueryByHours()

func (*Queries) UpsertQueryUsers

func (q *Queries) UpsertQueryUsers()

func (*Queries) UpsertTableJoinsInQueries

func (q *Queries) UpsertTableJoinsInQueries()

func (*Queries) UpsertTables

func (q *Queries) UpsertTables()

func (*Queries) UpsertTablesInQueries

func (q *Queries) UpsertTablesInQueries()

type Query

type Query struct {
	UID           uuid.UUID               `json:"uid,omitempty"`            // unique sha of the query
	DatabaseUID   uuid.UUID               `json:"database_uid,omitempty"`   // the dataset the query belongs to
	SourceUID     uuid.UUID               `json:"source_uid,omitempty"`     // the source the query belongs to
	QueryByHours  map[string]*QueryByHour `json:"query_by_hours,omitempty"` // query stats per hour
	Command       token.TokenType         `json:"command,omitempty"`        // the type of query
	MaskedQuery   string                  `json:"masked_query,omitempty"`   // the query with parameters masked
	UnmaskedQuery string                  `json:"unmasked_query,omitempty"` // the query with parameters unmasked
	SourceQuery   string                  `json:"source,omitempty"`         // the original query from the source

}

func (*Query) MarshalJSON

func (q *Query) MarshalJSON() ([]byte, error)

func (*Query) Process

func (q *Query) Process(w QueryWorker, qs *Queries) bool

Process processes a query and returns a bool whether or not the query was parsed successfully

func (*Query) UnmarshalJSON

func (q *Query) UnmarshalJSON(data []byte) error

type QueryByHour

type QueryByHour struct {
	UID                       uuid.UUID             `json:"uid,omitempty"`                          // unique sha of the query plus the time
	QueryUID                  uuid.UUID             `json:"query_uid,omitempty"`                    // unique sha of the query
	QueriedDate               string                `json:"queried_date,omitempty"`                 // the date the query was executed
	QueriedHour               int                   `json:"queried_hour,omitempty"`                 // the hour the query was executed
	TotalCount                int64                 `json:"total_count,omitempty"`                  // the number of times the query was executed
	TotalDurationUs           int64                 `json:"total_duration_us,omitempty"`            // the total duration of all executions of the query in microseconds
	TotalQueriesInTransaction int64                 `json:"total_queries_in_transaction,omitempty"` // the sum total number of queries each time this query was executed in a transaction
	Users                     map[string]*QueryUser `json:"users,omitempty"`                        // the users who executed the query
}

type QueryUser

type QueryUser struct {
	UID              uuid.UUID `json:"uid,omitempty"`
	QueriesByHourUID uuid.UUID `json:"queries_by_hour,omitempty"`
	UserName         string    `json:"user_name,omitempty"`
	TotalCount       int64     `json:"total_count,omitempty"`
	TotalDurationUs  int64     `json:"total_duration_us,omitempty"`
}

type QueryWorker

type QueryWorker struct {
	TimestampByHour       time.Time
	Databases             *Databases
	Source                *Source
	SourceUID             uuid.UUID
	Database              string
	DatabaseUID           uuid.UUID
	UserName              string
	Input                 string // Original query. This may contain many queries
	TransactionQueryCount int64  // Number of queries in a transaction
	DurationUs            int64  // Duration of the query in microseconds
	MustExtract           bool
	Command               token.TokenType
	Masked                string // Masked query. This is the query with all values replaced with ?
	Unmasked              string // Unmasked query. This is the query with all values left alone
}

This is an interim struct with additional Query meta data. The struct is passed around and built up instead of passing around a ton of individual variables. This is used both initially when compiling a list of queries and then individually when processing each query

type Source

type Source struct {
	UID  uuid.UUID `json:"uid,omitempty"`  // unique UUID of the source
	Name string    `json:"name,omitempty"` // the name of the source
	URL  string    `json:"url,omitempty"`  // the url of the source
}

func NewSource

func NewSource(name, url string) *Source

type Sources

type Sources struct {
	Sources map[string]*Source `json:"sources,omitempty"` // the key is the sha of the database
}

func NewSources

func NewSources() *Sources

func (*Sources) Add

func (d *Sources) Add(name, url string) *Source

func (*Sources) CountInDB

func (d *Sources) CountInDB() int

func (*Sources) Upsert

func (d *Sources) Upsert()

type Table

type Table struct {
	UID               uuid.UUID `json:"uid,omitempty"`                 // unique UUID of the table
	DatabaseUID       uuid.UUID `json:"database_uid,omitempty"`        // the UUID of the database -- TODO: make this a many to many
	Schema            string    `json:"schema_name,omitempty"`         // the schema of the table
	Name              string    `json:"table_name,omitempty"`          // the name of the table
	Description       string    `json:"table_description,omitempty"`   // a description of the table
	EstimatedRowCount int64     `json:"estimated_row_count,omitempty"` // the estimated number of rows in the table
	ColumnCount       int64     `json:"column_count,omitempty"`        // the number of columns in the table
	IndexCount        int64     `json:"index_count,omitempty"`         // the number of indexes on the table
	IndexSizeBytes    int64     `json:"index_size_bytes,omitempty"`    // the size of the indexes on the table
	DataSizeBytes     int64     `json:"data_size_bytes,omitempty"`     // the size of the data in the table
	TableType         string    `json:"table_type,omitempty"`          // the type of table (e.g. view, table, materialized view)
	CreatedAt         time.Time `json:"created_at,omitempty"`          // when the table was created
	UpdatedAt         time.Time `json:"updated_at,omitempty"`          // when the table was last updated
}

func (*Table) MarshalJSON

func (t *Table) MarshalJSON() ([]byte, error)

func (*Table) SetUID

func (t *Table) SetUID()

func (*Table) UnmarshalJSON

func (t *Table) UnmarshalJSON(data []byte) error

type Tables

type Tables struct {
	Tables map[string]*Table `json:"tables,omitempty"`
}

func NewTables

func NewTables() *Tables

func (*Tables) Add

func (t *Tables) Add(tab *Table) *Table

func (*Tables) CountInDB

func (t *Tables) CountInDB(db *sql.DB) int64

func (*Tables) SetAll

func (t *Tables) SetAll(db *sql.DB)

func (*Tables) Upsert

func (t *Tables) Upsert(db *sql.DB)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL