pfsdb

package
v2.8.0-nightly.20231004 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 3, 2023 License: Apache-2.0 Imports: 21 Imported by: 0

Documentation

Overview

Package pfsdb contains the database schema that PFS uses.

Index

Constants

View Source
const (
	BranchColumnID        = branchColumn("branch.id")
	BranchColumnCreatedAt = branchColumn("branch.created_at")
	BranchColumnUpdatedAt = branchColumn("branch.updated_at")
)
View Source
const (
	SortOrderNone = sortOrder("")
	SortOrderAsc  = sortOrder("ASC")
	SortOrderDesc = sortOrder("DESC")
)
View Source
const (
	// CommitsChannelName is used to watch events for the commits table.
	CommitsChannelName = "pfs_commits"
)
View Source
const (
	// ReposChannelName is used to watch events for the repos table.
	ReposChannelName = "pfs_repos"
)

Variables

View Source
var (
	CommitSetIDs   = CommitFields("commit_set_id")
	CommitOrigins  = CommitFields("origin")
	CommitRepos    = CommitFields("repo_id")
	CommitBranches = CommitFields("branch_id")
	CommitProjects = CommitFields("project_id")
)
View Source
var (
	RepoTypes    = RepoFields("type")
	RepoProjects = RepoFields("project_id")
	RepoNames    = RepoFields("name")
)
View Source
var BranchesRepoIndex = &col.Index{
	Name: "repo",
	Extract: func(val proto.Message) string {
		return RepoKey(val.(*pfs.BranchInfo).Branch.Repo)
	},
}
View Source
var CommitsBranchlessIndex = &col.Index{
	Name: "branchless",
	Extract: func(val proto.Message) string {
		return CommitKey(val.(*pfs.CommitInfo).Commit)
	},
}
View Source
var CommitsCommitSetIndex = &col.Index{
	Name: "commitset",
	Extract: func(val proto.Message) string {
		return val.(*pfs.CommitInfo).Commit.Id
	},
}
View Source
var CommitsRepoIndex = &col.Index{
	Name: "repo",
	Extract: func(val proto.Message) string {
		return RepoKey(val.(*pfs.CommitInfo).Commit.Repo)
	},
}

Functions

func AddCommit

func AddCommit(tx *pachsql.Tx, commit *pfs.Commit) error

func AddCommitProvenance

func AddCommitProvenance(tx *pachsql.Tx, from, to *pfs.Commit) error

func BranchKey

func BranchKey(branch *pfs.Branch) string

func Branches

func Branches(db *pachsql.DB, listener col.PostgresListener) col.PostgresCollection

Branches returns a collection of branches

func CommitKey

func CommitKey(commit *pfs.Commit) string

func CommitSetProvenance

func CommitSetProvenance(tx *pachsql.Tx, id string) (_ []*pfs.Commit, retErr error)

CommitSetProvenance returns all the commit IDs that are in the provenance of all the commits in this commit set.

TODO(provenance): is 'SELECT DISTINCT commit_id' a performance concern?

func CommitSetSubvenance

func CommitSetSubvenance(tx *pachsql.Tx, id string) (_ []*pfs.Commit, retErr error)

CommitSetSubvenance returns all the commit IDs that contain commits in this commit set in their full (transitive) provenance

func Commits

func Commits(db *pachsql.DB, listener col.PostgresListener) col.PostgresCollection

Commits returns a collection of commits

func CreateCommitAncestries added in v2.8.0

func CreateCommitAncestries(ctx context.Context, tx *pachsql.Tx, parentCommit CommitID, childrenCommits []CommitID) error

CreateCommitAncestries inserts ancestry relationships where the ids of both parent and children are known.

func CreateCommitChildren added in v2.8.0

func CreateCommitChildren(ctx context.Context, tx *pachsql.Tx, parentCommit CommitID, childCommits []*pfs.Commit) error

CreateCommitChildren inserts ancestry relationships using a single query for all of the children.

func CreateCommitParent added in v2.8.0

func CreateCommitParent(ctx context.Context, tx *pachsql.Tx, parentCommit *pfs.Commit, childCommit CommitID) error

CreateCommitParent inserts a single ancestry relationship where the child is known and parent must be derived.

func CreateDirectBranchProvenance added in v2.8.0

func CreateDirectBranchProvenance(ctx context.Context, tx *pachsql.Tx, from, to BranchID) error

CreateBranchProvenance creates a provenance relationship between two branches.

func CreateDirectBranchProvenanceBatch

func CreateDirectBranchProvenanceBatch(ctx context.Context, tx *pachsql.Tx, from BranchID, tos []BranchID) error

CreateBranchProvenanceBatch creates provenance relationships between a branch and a set of other branches.

func DeleteBranch added in v2.8.0

func DeleteBranch(ctx context.Context, tx *pachsql.Tx, id BranchID) error

DeleteBranch deletes a branch.

func DeleteBranchTrigger added in v2.8.0

func DeleteBranchTrigger(ctx context.Context, tx *pachsql.Tx, from BranchID) error

func DeleteCommit added in v2.8.0

func DeleteCommit(ctx context.Context, tx *pachsql.Tx, commit *pfs.Commit) error

DeleteCommit deletes an entry in the pfs.commits table. It also repoints the references in the commit_ancestry table. The caller is responsible for updating branchesg.

func DeleteDirectBranchProvenance

func DeleteDirectBranchProvenance(ctx context.Context, tx *pachsql.Tx, from, to BranchID) error

DeleteBranchProvenance deletes a provenance relationship between two branches.

func DeleteDirectBranchProvenanceBatch

func DeleteDirectBranchProvenanceBatch(ctx context.Context, tx *pachsql.Tx, from BranchID, tos []BranchID) error

DeleteBranchProvenanceBatch deletes provenance relationships between a branch and a set of other branches.

func DeleteRepo added in v2.8.0

func DeleteRepo(ctx context.Context, tx *pachsql.Tx, repoProject, repoName, repoType string) error

DeleteRepo deletes an entry in the pfs.repos table.

func GetBranchInfo added in v2.8.0

func GetBranchInfo(ctx context.Context, tx *pachsql.Tx, id BranchID) (*pfs.BranchInfo, error)

GetBranchInfo returns a *pfs.BranchInfo by id.

func GetBranchInfoByName

func GetBranchInfoByName(ctx context.Context, tx *pachsql.Tx, project, repo, repoType, branch string) (*pfs.BranchInfo, error)

GetBranchInfoByName returns a *pfs.BranchInfo by name

func GetBranchProvenance added in v2.8.0

func GetBranchProvenance(ctx context.Context, tx *pachsql.Tx, id BranchID) ([]*pfs.Branch, error)

GetBranchProvenance returns the full provenance of a branch, i.e. all branches that it either directly or transitively depends on.

func GetBranchSubvenance added in v2.8.0

func GetBranchSubvenance(ctx context.Context, tx *pachsql.Tx, id BranchID) ([]*pfs.Branch, error)

GetBranchSubvenance returns the full subvenance of a branch, i.e. all branches that either directly or transitively depend on it.

func GetBranchTrigger added in v2.8.0

func GetBranchTrigger(ctx context.Context, tx *pachsql.Tx, from BranchID) (*pfs.Trigger, error)

func GetCommit added in v2.8.0

func GetCommit(ctx context.Context, tx *pachsql.Tx, id CommitID) (*pfs.CommitInfo, error)

GetCommit returns the commitInfo where int_id=id.

func GetCommitByCommitKey added in v2.8.0

func GetCommitByCommitKey(ctx context.Context, tx *pachsql.Tx, commit *pfs.Commit) (*pfs.CommitInfo, error)

GetCommitByCommitKey is like GetCommit but derives the int_id on behalf of the caller.

func GetCommitChildren added in v2.8.0

func GetCommitChildren(ctx context.Context, tx *pachsql.Tx, parentCommit CommitID) ([]*pfs.Commit, error)

GetCommitChildren uses the pfs.commit_ancestry and pfs.commits tables to retrieve commits of all of the children given an int_id of the parent.

func GetCommitParent added in v2.8.0

func GetCommitParent(ctx context.Context, tx *pachsql.Tx, childCommit CommitID) (*pfs.Commit, error)

GetCommitParent uses the pfs.commit_ancestry and pfs.commits tables to retrieve a commit given an int_id of one of its children.

func GetDirectBranchProvenance added in v2.8.0

func GetDirectBranchProvenance(ctx context.Context, tx *pachsql.Tx, id BranchID) ([]*pfs.Branch, error)

GetDirectBranchProvenance returns the direct provenance of a branch, i.e. all branches that it directly depends on.

func GetRepo added in v2.8.0

func GetRepo(ctx context.Context, tx *pachsql.Tx, id RepoID) (*pfs.RepoInfo, error)

todo(fahad): rewrite branch related code during the branches migration. GetRepo retrieves an entry from the pfs.repos table by using the row id.

func GetRepoByName added in v2.8.0

func GetRepoByName(ctx context.Context, tx *pachsql.Tx, repoProject, repoName, repoType string) (*pfs.RepoInfo, error)

GetRepoByName retrieves an entry from the pfs.repos table by project, repo name, and type.

func IsChildCommitNotFound added in v2.8.0

func IsChildCommitNotFound(err error) bool

func IsDuplicateKeyErr added in v2.8.0

func IsDuplicateKeyErr(err error) bool

func IsErrRepoNotFound added in v2.8.0

func IsErrRepoNotFound(err error) bool

func IsParentCommitNotFound added in v2.8.0

func IsParentCommitNotFound(err error) bool

func OrderByQuery added in v2.8.0

func OrderByQuery[T ColumnName](orderBys ...OrderByColumn[T]) string

func ParseBranch added in v2.6.6

func ParseBranch(key string) *pfs.Branch

func ParseCommit

func ParseCommit(key string) *pfs.Commit

func ParseRepo

func ParseRepo(key string) *pfs.Repo

func ProjectKey

func ProjectKey(project *pfs.Project) string

func RepoKey

func RepoKey(repo *pfs.Repo) string

func ResolveCommitProvenance

func ResolveCommitProvenance(tx *pachsql.Tx, repo *pfs.Repo, commitSet string) (*pfs.Commit, error)

returns the commit of a certain repo in a commit set.

func SliceDiff

func SliceDiff[K comparable, V any](a, b []V, key func(V) K) []V

SliceDiff takes two slices and returns the elements in the first slice that are not in the second slice. TODO this can be moved to a more generic package.

func UpdateCommit added in v2.8.0

func UpdateCommit(ctx context.Context, tx *pachsql.Tx, id CommitID, commitInfo *pfs.CommitInfo, opts ...AncestryOpt) error

UpdateCommit overwrites an existing commit entry by CommitID as well as the corresponding ancestry entries.

func UpsertBranchTrigger added in v2.8.0

func UpsertBranchTrigger(ctx context.Context, tx *pachsql.Tx, from BranchID, to BranchID, trigger *pfs.Trigger) error

Types

type AncestryOpt added in v2.8.0

type AncestryOpt struct {
	SkipChildren bool
	SkipParent   bool
}

AncestryOpt allows users to create commitInfos and skip creating the ancestry information. This allows a user to create the commits in an arbitrary order, then create their ancestry later.

type Branch added in v2.8.0

type Branch struct {
	ID   BranchID `db:"id"`
	Head Commit   `db:"head"`
	Repo Repo     `db:"repo"`
	Name string   `db:"name"`
	CreatedAtUpdatedAt
}

Branch is a row in the pfs.branches table.

func (*Branch) Pb added in v2.8.0

func (branch *Branch) Pb() *pfs.Branch

type BranchID added in v2.8.0

type BranchID uint64

BranchID is the row id for a branch entry in postgres.

func GetBranchID added in v2.8.0

func GetBranchID(ctx context.Context, tx *pachsql.Tx, branch *pfs.Branch) (BranchID, error)

GetBranchID returns the id of a branch given a set strings that uniquely identify a branch.

func UpsertBranch added in v2.8.0

func UpsertBranch(ctx context.Context, tx *pachsql.Tx, branchInfo *pfs.BranchInfo) (BranchID, error)

UpsertBranch creates a branch if it does not exist, or updates the head if the branch already exists. If direct provenance is specified, it will be used to update the branch's provenance relationships.

type BranchInfoWithID added in v2.8.0

type BranchInfoWithID struct {
	ID BranchID
	*pfs.BranchInfo
}

type BranchIterator added in v2.8.0

type BranchIterator struct {
	// contains filtered or unexported fields
}

func NewBranchIterator added in v2.8.0

func NewBranchIterator(ctx context.Context, tx *pachsql.Tx, startPage, pageSize uint64, filter *pfs.Branch, orderBys ...OrderByBranchColumn) (*BranchIterator, error)

func (*BranchIterator) Next added in v2.8.0

type BranchTrigger added in v2.8.0

type BranchTrigger struct {
	FromBranch    Branch `db:"from_branch"`
	ToBranch      Branch `db:"to_branch"`
	CronSpec      string `db:"cron_spec"`
	RateLimitSpec string `db:"rate_limit_spec"`
	Size          string `db:"size"`
	NumCommits    int64  `db:"num_commits"`
	AllConditions bool   `db:"all_conditions"`
}

func (*BranchTrigger) Pb added in v2.8.0

func (trigger *BranchTrigger) Pb() *pfs.Trigger

type ColumnName added in v2.8.0

type ColumnName interface{ string | branchColumn }

type Commit added in v2.8.0

type Commit struct {
	ID             CommitID  `db:"int_id"`
	CommitSetID    string    `db:"commit_set_id"`
	CommitID       string    `db:"commit_id"`
	Origin         string    `db:"origin"`
	Description    string    `db:"description"`
	StartTime      time.Time `db:"start_time"`
	FinishingTime  time.Time `db:"finishing_time"`
	FinishedTime   time.Time `db:"finished_time"`
	CompactingTime int64     `db:"compacting_time_s"`
	ValidatingTime int64     `db:"validating_time_s"`
	Error          string    `db:"error"`
	Size           int64     `db:"size"`
	// BranchName is used to derive the BranchID in commit related queries.
	BranchName sql.NullString `db:"branch_name"`
	BranchID   sql.NullInt64  `db:"branch_id"`
	Repo       Repo           `db:"repo"`
	CreatedAtUpdatedAt
}

func (*Commit) Pb added in v2.8.0

func (commit *Commit) Pb() *pfs.Commit

type CommitFields

type CommitFields string

CommitFields is used in the ListCommitFilter and defines specific field names for type safety. This should hopefully prevent a library user from misconfiguring the filter.

type CommitID added in v2.8.0

type CommitID uint64

CommitID is the row id for a commit entry in postgres.

func CreateCommit added in v2.8.0

func CreateCommit(ctx context.Context, tx *pachsql.Tx, commitInfo *pfs.CommitInfo, opts ...AncestryOpt) (CommitID, error)

CreateCommit creates an entry in the pfs.commits table. If the commit has a parent or children, it will attempt to create entries in the pfs.commit_ancestry table unless options are provided to skip ancestry creation.

func GetCommitID added in v2.8.0

func GetCommitID(ctx context.Context, tx *pachsql.Tx, commit *pfs.Commit) (CommitID, error)

GetCommitID returns the int_id of a commit in postgres.

func UpsertCommit added in v2.8.0

func UpsertCommit(ctx context.Context, tx *pachsql.Tx, commitInfo *pfs.CommitInfo, opts ...AncestryOpt) (CommitID, error)

UpsertCommit will attempt to insert a commit and its ancestry relationships. If the commit already exists, it will update its description.

type CommitIterator added in v2.8.0

type CommitIterator struct {
	// contains filtered or unexported fields
}

CommitIterator batches a page of Commit entries along with their parent and children. (id, entry) tuples can be retrieved using iter.Next().

func ListCommit

func ListCommit(ctx context.Context, db *pachsql.DB, filter CommitListFilter, rev bool) (*CommitIterator, error)

ListCommit returns a CommitIterator that exposes a Next() function for retrieving *pfs.CommitInfo references. It manages transactions on behalf of its user under the hood.

func (*CommitIterator) Next added in v2.8.0

func (iter *CommitIterator) Next(ctx context.Context, dst *CommitPair) error

Next advances the iterator by one row. It returns a stream.EOS when there are no more entries. The iterator prefetches the parents and children of the buffered commits until it hits an internal capacity.

type CommitListFilter

type CommitListFilter map[CommitFields][]string

CommitListFilter is a filter for listing commits. It ANDs together separate keys, but ORs together the key values: where commit.<key_1> IN (<key_1:value_1>, <key_2:value_2>, ...) AND commit.<key_2> IN (<key_2:value_1>,<key_2:value_2>,...)

type CommitPair

type CommitPair struct {
	ID         CommitID
	CommitInfo *pfs.CommitInfo
}

CommitPair is an (id, commitInfo) tuple returned by the commit iterator.

type CreatedAtUpdatedAt added in v2.8.0

type CreatedAtUpdatedAt struct {
	CreatedAt time.Time `db:"created_at"`
	UpdatedAt time.Time `db:"updated_at"`
}

type ErrChildCommitNotFound

type ErrChildCommitNotFound struct {
	Repo           string
	ParentRowID    CommitID
	ParentCommitID string
}

ErrChildCommitNotFound is returned when a commit's child is not found in postgres.

func (ErrChildCommitNotFound) Error

func (err ErrChildCommitNotFound) Error() string

func (ErrChildCommitNotFound) GRPCStatus

func (err ErrChildCommitNotFound) GRPCStatus() *status.Status

type ErrCommitAlreadyExists

type ErrCommitAlreadyExists struct {
	CommitID string
}

ErrCommitAlreadyExists is returned when a commit with the same name already exists in postgres.

func (ErrCommitAlreadyExists) Error

func (err ErrCommitAlreadyExists) Error() string

Error satisfies the error interface.

func (ErrCommitAlreadyExists) GRPCStatus

func (err ErrCommitAlreadyExists) GRPCStatus() *status.Status

type ErrCommitMissingInfo

type ErrCommitMissingInfo struct {
	Field string
}

ErrCommitMissingInfo is returned when a commitInfo is missing a field.

func (ErrCommitMissingInfo) Error

func (err ErrCommitMissingInfo) Error() string

func (ErrCommitMissingInfo) GRPCStatus

func (err ErrCommitMissingInfo) GRPCStatus() *status.Status

type ErrCommitNotFound

type ErrCommitNotFound struct {
	RowID    CommitID
	CommitID string
}

ErrCommitNotFound is returned by GetCommit() when a commit is not found in postgres.

func (ErrCommitNotFound) Error

func (err ErrCommitNotFound) Error() string

func (ErrCommitNotFound) GRPCStatus

func (err ErrCommitNotFound) GRPCStatus() *status.Status

type ErrParentCommitNotFound

type ErrParentCommitNotFound struct {
	ChildRowID    CommitID
	ChildCommitID string
}

ErrParentCommitNotFound is returned when a commit's parent is not found in postgres.

func (ErrParentCommitNotFound) Error

func (err ErrParentCommitNotFound) Error() string

func (ErrParentCommitNotFound) GRPCStatus

func (err ErrParentCommitNotFound) GRPCStatus() *status.Status

type ErrRepoNotFound

type ErrRepoNotFound struct {
	Project string
	Name    string
	Type    string
	ID      RepoID
}

ErrRepoNotFound is returned by GetRepo() when a repo is not found in postgres.

func (ErrRepoNotFound) Error

func (err ErrRepoNotFound) Error() string

Error satisfies the error interface.

func (ErrRepoNotFound) GRPCStatus

func (err ErrRepoNotFound) GRPCStatus() *status.Status

type ModelType added in v2.8.0

type ModelType interface{ Repo | Commit | Branch }

type OrderByBranchColumn added in v2.8.0

type OrderByBranchColumn OrderByColumn[branchColumn]

type OrderByColumn added in v2.8.0

type OrderByColumn[T ColumnName] struct {
	Column T
	Order  sortOrder
}

type Repo added in v2.8.0

type Repo struct {
	ID          RepoID         `db:"id"`
	Project     coredb.Project `db:"project"`
	Name        string         `db:"name"`
	Type        string         `db:"type"`
	Description string         `db:"description"`
	CreatedAtUpdatedAt

	// Branches is a string that contains an array of hex-encoded branchInfos. The array is enclosed with curly braces.
	// Each entry is prefixed with '//x' and entries are delimited by a ','
	Branches string `db:"branches"`
}

Repo is a row in the pfs.repos table.

func (*Repo) Pb added in v2.8.0

func (repo *Repo) Pb() *pfs.Repo

func (*Repo) PbInfo added in v2.8.0

func (repo *Repo) PbInfo() (*pfs.RepoInfo, error)

type RepoFields

type RepoFields string

RepoFields is used in the ListRepoFilter and defines specific field names for type safety. This should hopefully prevent a library user from misconfiguring the filter.

type RepoID added in v2.8.0

type RepoID uint64

RepoID is the row id for a repo entry in postgres. A separate type is defined for safety so row ids must be explicitly cast for use in another table.

func GetRepoID added in v2.8.0

func GetRepoID(ctx context.Context, tx *pachsql.Tx, repoProject, repoName, repoType string) (RepoID, error)

func UpsertRepo added in v2.8.0

func UpsertRepo(ctx context.Context, tx *pachsql.Tx, repo *pfs.RepoInfo) (RepoID, error)

UpsertRepo will attempt to insert a repo, and return its ID. If the repo already exists, it will update its description.

type RepoIterator added in v2.8.0

type RepoIterator struct {
	// contains filtered or unexported fields
}

RepoIterator batches a page of Repo entries. Entries can be retrieved using iter.Next().

func ListRepo added in v2.8.0

func ListRepo(ctx context.Context, tx *pachsql.Tx, filter RepoListFilter) (*RepoIterator, error)

ListRepo returns a RepoIterator that exposes a Next() function for retrieving *pfs.RepoInfo references.

func (*RepoIterator) Next added in v2.8.0

func (iter *RepoIterator) Next(ctx context.Context, dst *RepoPair) error

Next advances the iterator by one row. It returns a stream.EOS when there are no more entries.

type RepoListFilter

type RepoListFilter map[RepoFields][]string

RepoListFilter is a filter for listing repos. It ANDs together separate keys, but ORs together the key values: where repo.<key_1> IN (<key_1:value_1>, <key_2:value_2>, ...) AND repo.<key_2> IN (<key_2:value_1>,<key_2:value_2>,...)

type RepoPair

type RepoPair struct {
	ID       RepoID
	RepoInfo *pfs.RepoInfo
}

RepoPair is an (id, repoInfo) tuple returned by the repo iterator.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL