dadsgha

package module
v0.0.0-...-edc1889 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 15, 2021 License: Apache-2.0 Imports: 16 Imported by: 1

README

da-ds-gha

Dev Analytics data Source GitHub Archives

Usage:

  • Example: GHA_HOUR_FROM=9 GHA_HOUR_TO=9 SDS_GITHUB_OAUTH="cat /etc/github/oauths" ./dadsgha ~/dev/LF-Engineering/dev-analytics-api/app/services/lf/bootstrap/fixtures.

Documentation

Index

Constants

View Source
const GitHubOrg string = "github_org"

GitHubOrg - github_org

View Source
const GitHubUser string = "github_user"

GitHubUser - github_user

View Source
const Now string = "now"

Now - common constant string

View Source
const OK string = "ok"

OK - common constant string

View Source
const Today string = "today"

Today - common constant string

Variables

This section is empty.

Functions

func DayStart

func DayStart(dt time.Time) time.Time

DayStart - return time rounded to current day start

func EndpointIncluded

func EndpointIncluded(ctx *Ctx, ep *RawEndpoint, origin string) (bool, int)

EndpointIncluded - checks if given endpoint's origin should be included or excluded based on endpoint's skip/only regular expressions lists First return value specifies if endpoint is included or not Second value specifies: 1 - included by 'only' condition, 2 - skipped by 'skip' condition

func ExecCommand

func ExecCommand(ctx *Ctx, cmdAndArgs []string, env map[string]string, tmout *time.Duration) (string, error)

ExecCommand - execute command given by array of strings with eventual environment map

func FatalOnError

func FatalOnError(err error) string

FatalOnError displays error message (if error present) and exits program

func Fatalf

func Fatalf(f string, a ...interface{})

Fatalf - it will call FatalOnError using fmt.Errorf with args provided

func GHClient

func GHClient(ctx *Ctx) (ghCtx context.Context, clients []*github.Client)

GHClient - get GitHub client

func GetFixtures

func GetFixtures(ctx *Ctx, path string) (fixtures []string)

GetFixtures - read all fixture files

func GetRateLimits

func GetRateLimits(gctx context.Context, ctx *Ctx, gcs []*github.Client, core bool) (int, []int, []int, []time.Duration)

GetRateLimits - returns all and remaining API points and duration to wait for reset when core=true - returns Core limits, when core=false returns Search limits

func GetThreadsNum

func GetThreadsNum(ctx *Ctx) int

GetThreadsNum returns the number of available CPUs If environment variable GHA_ST is set it retuns 1 It can be used to debug single threaded verion

func HourStart

func HourStart(dt time.Time) time.Time

HourStart - return time rounded to current hour start

func MakeOldRepoName

func MakeOldRepoName(repo *ForkeeOld) string

MakeOldRepoName - before 2015 repository name should be Organization/Name (if Organization present) or just Name

func MonthStart

func MonthStart(dt time.Time) time.Time

MonthStart - return time rounded to current month start

func NDaysStart

func NDaysStart(dt time.Time, nDays int) time.Time

NDaysStart - return time rounded to current n-days start

func NextDayStart

func NextDayStart(dt time.Time) time.Time

NextDayStart - return time rounded to next day start

func NextHourStart

func NextHourStart(dt time.Time) time.Time

NextHourStart - return time rounded to next hour start

func NextMonthStart

func NextMonthStart(dt time.Time) time.Time

NextMonthStart - return time rounded to next month start

func NextNDaysStart

func NextNDaysStart(dt time.Time, nDays int) time.Time

NextNDaysStart - return time rounded to next n-days start

func NextQuarterStart

func NextQuarterStart(dt time.Time) time.Time

NextQuarterStart - return time rounded to next quarter start

func NextWeekStart

func NextWeekStart(dt time.Time) time.Time

NextWeekStart - return time rounded to next week start

func NextYearStart

func NextYearStart(dt time.Time) time.Time

NextYearStart - return time rounded to next year start

func ParseGHAString

func ParseGHAString(dtStr string) time.Time

ParseGHAString - Parse GHA date in 2020121514 format

func PeriodParse

func PeriodParse(perStr string) (dur time.Duration, ok bool)

PeriodParse - tries to parse period

func PrettyPrintJSON

func PrettyPrintJSON(jsonBytes []byte) []byte

PrettyPrintJSON - pretty formats raw JSON bytes

func PrevDayStart

func PrevDayStart(dt time.Time) time.Time

PrevDayStart - return time rounded to prev day start

func PrevHourStart

func PrevHourStart(dt time.Time) time.Time

PrevHourStart - return time rounded to prev hour start

func PrevMonthStart

func PrevMonthStart(dt time.Time) time.Time

PrevMonthStart - return time rounded to prev month start

func PrevNDaysStart

func PrevNDaysStart(dt time.Time, nDays int) time.Time

PrevNDaysStart - return time rounded to prev n-days start

func PrevQuarterStart

func PrevQuarterStart(dt time.Time) time.Time

PrevQuarterStart - return time rounded to prev quarter start

func PrevWeekStart

func PrevWeekStart(dt time.Time) time.Time

PrevWeekStart - return time rounded to prev week start

func PrevYearStart

func PrevYearStart(dt time.Time) time.Time

PrevYearStart - return time rounded to prev year start

func Printf

func Printf(format string, args ...interface{}) (n int, err error)

Printf is a wrapper around Printf(...) that supports logging.

func ProgressInfo

func ProgressInfo(i, n int, start time.Time, last *time.Time, period time.Duration, msg string)

ProgressInfo display info about progress: i/n if current time >= last + period If displayed info, update last

func QuarterStart

func QuarterStart(dt time.Time) time.Time

QuarterStart - return time rounded to current month start

func TimeParseAny

func TimeParseAny(dtStr string) time.Time

TimeParseAny - attempts to parse time from string YYYY-MM-DD HH:MI:SS Skipping parts from right until only YYYY id left

func TimeParseES

func TimeParseES(dtStr string) (time.Time, error)

TimeParseES - parse ES time format

func ToGHADate

func ToGHADate(dt time.Time) string

ToGHADate - return time formatted as YYYY-MM-DD-H

func ToGHADate2

func ToGHADate2(dt time.Time) string

ToGHADate2 - return time formatted as YYYY-MM-DD-HH

func ToPeriodDate

func ToPeriodDate(dt time.Time, nDays int) string

ToPeriodDate - return time formatted as N-YYYYMMDD

func ToYMDDate

func ToYMDDate(dt time.Time) string

ToYMDDate - return time formatted as YYYY-MM-DD

func ToYMDHMSDate

func ToYMDHMSDate(dt time.Time) string

ToYMDHMSDate - return time formatted as YYYY-MM-DD HH:MI:SS

func ToYMDate

func ToYMDate(dt time.Time) string

ToYMDate - return time formatted as YYYYMM

func WeekStart

func WeekStart(dt time.Time) time.Time

WeekStart - return time rounded to current week start Assumes first week day is Sunday

func YearStart

func YearStart(dt time.Time) time.Time

YearStart - return time rounded to current month start

Types

type Actor

type Actor struct {
	ID    int    `json:"id"`
	Login string `json:"login"`
	Name  string `json:"-"`
}

Actor - GHA Actor structure Name is unexported and not used by JSON load/save But is used when importing affiliations from cncf/gitdm:github_users.json

type Asset

type Asset struct {
	ID            int       `json:"id"`
	CreatedAt     time.Time `json:"created_at"`
	UpdatedAt     time.Time `json:"updated_at"`
	Name          string    `json:"name"`
	Label         *string   `json:"label"`
	Uploader      Actor     `json:"uploader"`
	ContentType   string    `json:"content_type"`
	State         string    `json:"state"`
	Size          int       `json:"size"`
	DownloadCount int       `json:"download_count"`
}

Asset - GHA Asset structure

type Author

type Author struct {
	Name  string `json:"name"`
	Email string `json:"email"`
}

Author - GHA Commit Author structure

type Branch

type Branch struct {
	SHA   string  `json:"sha"`
	User  *Actor  `json:"user"`
	Repo  *Forkee `json:"repo"` // This is confusing, but actually GHA has "repo" fields that holds "forkee" structure
	Label string  `json:"label"`
	Ref   string  `json:"ref"`
}

Branch - GHA Branch structure

type Comment

type Comment struct {
	ID                  int       `json:"id"`
	Body                string    `json:"body"`
	CreatedAt           time.Time `json:"created_at"`
	UpdatedAt           time.Time `json:"updated_at"`
	User                Actor     `json:"user"`
	CommitID            *string   `json:"commit_id"`
	OriginalCommitID    *string   `json:"original_commit_id"`
	DiffHunk            *string   `json:"diff_hunk"`
	Position            *int      `json:"position"`
	OriginalPosition    *int      `json:"original_position"`
	Path                *string   `json:"path"`
	PullRequestReviewID *int      `json:"pull_request_review_id"`
	Line                *int      `json:"line"`
}

Comment - GHA Comment structure

type Commit

type Commit struct {
	SHA      string `json:"sha"`
	Author   Author `json:"author"`
	Message  string `json:"message"`
	Distinct bool   `json:"distinct"`
}

Commit - GHA Commit structure

type Ctx

type Ctx struct {
	Debug                 int      // From GHA_DEBUG Debug level: 0-no, 1-info, 2-verbose
	CmdDebug              int      // From GHA_CMDDEBUG Commands execution Debug level: 0-no, 1-only output commands, 2-output commands and their output, 3-output full environment as well, default 0
	ST                    bool     // From GHA_ST true: use single threaded version, false: use multi threaded version, default false
	NCPUs                 int      // From GHA_NCPUS, set to override number of CPUs to run, this overwrites GHA_ST, default 0 (which means do not use it, use all CPU reported by go library)
	NCPUsScale            float64  // From GHA_NCPUS_SCALE, scale number of CPUs, for example 2.0 will report number of cpus 2.0 the number of actually available CPUs
	ExecFatal             bool     // default true, set this manually to false to avoid lib.ExecCommand calling os.Exit() on failure and return error instead
	ExecQuiet             bool     // default false, set this manually to true to have quiet exec failures
	ExecOutput            bool     // default false, set to true to capture commands STDOUT
	ExecOutputStderr      bool     // default false, set to true to capture commands STDOUT
	GitHubOAuth           string   // From GHA_GITHUB_OAUTH, if not set it attempts to use public access, if contains "/" it will assume that it contains file name, if "," found then it will assume that this is a list of OAuth tokens instead of just one
	ESURL                 string   // From GHA_ES_URL - ElasticSearch URL
	ESBulkSize            int      // From GHA_ES_BULK_SIZE, bulk upload size, default 1000
	LoadConfig            bool     // From GHA_LOAD_CONFIG, if set - it will load configuration instead of reading all fixtures
	SaveConfig            bool     // From GHA_SAVE_CONFIG, if set - it will save configuration in a JSON file
	NoIncremental         bool     // From GHA_NO_INCREMENTAL, if set - it will not attempt to detect fixture changes since last run and will treat all fixtures as new and detect the start date everywhere
	NoGHAMap              bool     // From GHA_NO_GHA_MAP, if set - it will not use any GHA map files (which are very memory consuming)
	NoGHARepoDates        bool     // From GHA_NO_GHA_REPO_DATES, if set, it will skip GHA repo dates processing (file is huge, requires around 30G of memory), GHA map files can still be processed
	NoAffiliation         bool     // From GHA_NO_AFFILIATION, if set, no enrollemnts affiliations will be processed
	ConfigFile            string   // From GHA_CONFIG_FILE, configuration save/load file (root name), default "gha_config/" (gha_config/fixtures.json, gha_config/dates.json, gha_config/repos.json)
	GapURL                string   // From GHA_GAP_URL, address of the GAP API
	MaxParallelSHAs       int      // From GHA_MAX_PARALLEL_SHAS, maximum number of GHA repo dates SHA files to process in parallel, setting to 0 means unlimited (basically NCPUS)
	MaxParallelAPIReviews int      // From GHA_MAX_PARALLEL_API_REVIEWS, maximum number of GitHub PR Reviews API calls, setting to 0 means unlimited (basically NCPUS)
	MaxJSONsBytes         int64    // From GHA_MAX_JSONS_GBYTES, when processing multiple GHA hours in parallel, single hour uncompressed can even be 800M, when you have say 64 CPUs then you can reserve 50+ G, so you can specify limit in Gb, default is 0 = no limit
	MemHeartBeatBytes     int64    // From GHA_MEM_HEARTBEAT_GBYTES, display memory hearbeat warning when exceeding this limit (heartbeat happens every 15s), default is 0 - not to display any warnings.
	TestMode              bool     // True when running tests
	OAuthKeys             []string // GitHub oauth keys recevide from GHA_GITHUB_OAUTH configuration (initialized only when lib.GHClient() is called)
}

Ctx - environment context packed in structure

func (*Ctx) Init

func (ctx *Ctx) Init()

Init - get context from environment variables

func (*Ctx) Print

func (ctx *Ctx) Print()

Print context contents

type DataSource

type DataSource struct {
	Slug          string        `yaml:"slug"`
	Projects      []Project     `yaml:"projects"`
	RawEndpoints  []RawEndpoint `yaml:"endpoints"`
	HistEndpoints []RawEndpoint `yaml:"historical_endpoints"`
	IndexSuffix   string        `yaml:"index_suffix"`
}

DataSource contains data source spec from dev-analytics-api

type Event

type Event struct {
	ID        string    `json:"id"`
	Type      string    `json:"type"`
	Public    bool      `json:"public"`
	CreatedAt time.Time `json:"created_at"`
	Actor     Actor     `json:"actor"`
	Repo      Repo      `json:"repo"`
	Org       *Org      `json:"org"`
	Payload   Payload   `json:"payload"`
	// Extra fields added by dadsgha
	GHADt      time.Time         `json:"gha_hour"`
	GHAFxSlug  string            `json:"gha_fixture_slug"`
	GHAProj    string            `json:"gha_project"`
	GHASuffMap map[string]string `json:"gha_suff_map"`
}

Event - full GHA (GitHub Archive) event structure

type EventOld

type EventOld struct {
	ID         string      `json:"-"`
	Type       string      `json:"type"`
	Public     bool        `json:"public"`
	CreatedAt  time.Time   `json:"created_at"`
	Actor      string      `json:"actor"`
	Repository ForkeeOld   `json:"repository"`
	Payload    *PayloadOld `json:"payload"`
	// Extra fields added by dadsgha
	GHADt      time.Time         `json:"gha_hour"`
	GHAFxSlug  string            `json:"gha_fixture_slug"`
	GHAProj    string            `json:"gha_project"`
	GHASuffMap map[string]string `json:"gha_suff_map"`
}

EventOld - full GHA (GitHub Archive) event structure, before 2015

type Fixture

type Fixture struct {
	Disabled    bool         `yaml:"disabled"`
	Native      Native       `yaml:"native"`
	DataSources []DataSource `yaml:"data_sources"`
}

Fixture contains full YAML structure of dev-analytics-api fixture files

type Forkee

type Forkee struct {
	ID              int        `json:"id"`
	Name            string     `json:"name"`
	FullName        string     `json:"full_name"`
	Owner           Actor      `json:"owner"`
	Description     *string    `json:"description"`
	Public          *bool      `json:"public"`
	Fork            bool       `json:"fork"`
	CreatedAt       time.Time  `json:"created_at"`
	UpdatedAt       time.Time  `json:"updated_at"`
	PushedAt        *time.Time `json:"pushed_at"`
	Homepage        *string    `json:"homepage"`
	Size            int        `json:"size"`
	StargazersCount int        `json:"stargazers_count"`
	HasIssues       bool       `json:"has_issues"`
	HasProjects     *bool      `json:"has_projects"`
	HasDownloads    bool       `json:"has_downloads"`
	HasWiki         bool       `json:"has_wiki"`
	HasPages        *bool      `json:"has_pages"`
	Forks           int        `json:"forks"`
	OpenIssues      int        `json:"open_issues"`
	Watchers        int        `json:"watchers"`
	DefaultBranch   string     `json:"default_branch"`
}

Forkee - GHA Forkee structure

type ForkeeOld

type ForkeeOld struct {
	ID            int        `json:"id"`
	CreatedAt     time.Time  `json:"created_at"`
	Description   *string    `json:"description"`
	Fork          bool       `json:"fork"`
	Forks         int        `json:"forks"`
	HasDownloads  bool       `json:"has_downloads"`
	HasIssues     bool       `json:"has_issues"`
	HasWiki       bool       `json:"has_wiki"`
	Homepage      *string    `json:"homepage"`
	Language      *string    `json:"language"`
	DefaultBranch string     `json:"master_branch"`
	Name          string     `json:"name"`
	OpenIssues    int        `json:"open_issues"`
	Organization  *string    `json:"organization"`
	Owner         string     `json:"owner"`
	Private       *bool      `json:"private"`
	PushedAt      *time.Time `json:"pushed_at"`
	Size          int        `json:"size"`
	Stargazers    int        `json:"stargazers"`
	Watchers      int        `json:"watchers"`
}

ForkeeOld - GHA Forkee structure (from before 2015) Handle missing 4 last properties (including two non-nulls!)

type Issue

type Issue struct {
	ID          int        `json:"id"`
	Number      int        `json:"number"`
	Comments    int        `json:"comments"`
	Title       string     `json:"title"`
	State       string     `json:"state"`
	Locked      bool       `json:"locked"`
	Body        *string    `json:"body"`
	User        Actor      `json:"user"`
	Assignee    *Actor     `json:"assignee"`
	Labels      []Label    `json:"labels"`
	Assignees   []Actor    `json:"assignees"`
	Milestone   *Milestone `json:"milestone"`
	CreatedAt   time.Time  `json:"created_at"`
	UpdatedAt   time.Time  `json:"updated_at"`
	ClosedAt    *time.Time `json:"closed_at"`
	PullRequest *struct{}  `json:"pull_request"`
}

Issue - GHA Issue structure

type Label

type Label struct {
	ID      *int   `json:"id"`
	Name    string `json:"name"`
	Color   string `json:"color"`
	Default *bool  `json:"default"`
}

Label - GHA Label structure

type Milestone

type Milestone struct {
	ID           int        `json:"id"`
	Name         string     `json:"name"`
	Number       int        `json:"number"`
	Title        string     `json:"title"`
	Description  *string    `json:"description"`
	Creator      *Actor     `json:"creator"`
	OpenIssues   int        `json:"open_issues"`
	ClosedIssues int        `json:"closed_issues"`
	State        string     `json:"state"`
	CreatedAt    time.Time  `json:"created_at"`
	UpdatedAt    time.Time  `json:"updated_at"`
	ClosedAt     *time.Time `json:"closed_at"`
	DueOn        *time.Time `json:"due_on"`
}

Milestone - GHA Milestone structure

type Native

type Native struct {
	Slug string `yaml:"slug"`
}

Native - keeps fixture slug and eventual global affiliation source

type Org

type Org struct {
	ID    int    `json:"id"`
	Login string `json:"login"`
}

Org - GHA Org structure

type Page

type Page struct {
	SHA    string `json:"sha"`
	Action string `json:"action"`
	Title  string `json:"title"`
}

Page - GHA Page structure

type Payload

type Payload struct {
	PushID       *int         `json:"push_id"`
	Size         *int         `json:"size"`
	Ref          *string      `json:"ref"`
	Head         *string      `json:"head"`
	Before       *string      `json:"before"`
	Action       *string      `json:"action"`
	RefType      *string      `json:"ref_type"`
	MasterBranch *string      `json:"master_branch"`
	Description  *string      `json:"description"`
	Number       *int         `json:"number"`
	Forkee       *Forkee      `json:"forkee"`
	Release      *Release     `json:"release"`
	Member       *Actor       `json:"member"`
	Issue        *Issue       `json:"issue"`
	Comment      *Comment     `json:"comment"`
	Commits      *[]Commit    `json:"commits"`
	Pages        *[]Page      `json:"pages"`
	PullRequest  *PullRequest `json:"pull_request"`
}

Payload - GHA Payload structure

type PayloadOld

type PayloadOld struct {
	Issue        *int           `json:"issue"`
	IssueID      *int           `json:"issue_id"`
	Comment      *Comment       `json:"comment"`
	CommentID    *int           `json:"comment_id"`
	Description  *string        `json:"description"`
	MasterBranch *string        `json:"master_branch"`
	Ref          *string        `json:"ref"`
	Action       *string        `json:"action"`
	RefType      *string        `json:"ref_type"`
	Head         *string        `json:"head"`
	Size         *int           `json:"size"`
	Number       *int           `json:"number"`
	PullRequest  *PullRequest   `json:"pull_request"`
	Member       *Actor         `json:"member"`
	Release      *Release       `json:"release"`
	Pages        *[]Page        `json:"pages"`
	Commit       *string        `json:"commit"`
	SHAs         *[]interface{} `json:"shas"`
	Repository   *Forkee        `json:"repository"`
	Team         *Team          `json:"team"`
}

PayloadOld - GHA Payload structure (from before 2015)

type Project

type Project struct {
	Name          string        `yaml:"name"`
	RawEndpoints  []RawEndpoint `yaml:"endpoints"`
	HistEndpoints []RawEndpoint `yaml:"historical_endpoints"`
}

Project holds project data and list of endpoints

type PullRequest

type PullRequest struct {
	ID                  int64      `json:"id"`
	Base                Branch     `json:"base"`
	Head                Branch     `json:"head"`
	User                Actor      `json:"user"`
	Number              int        `json:"number"`
	State               string     `json:"state"`
	Locked              *bool      `json:"locked"`
	Title               string     `json:"title"`
	Body                *string    `json:"body"`
	CreatedAt           time.Time  `json:"created_at"`
	UpdatedAt           time.Time  `json:"updated_at"`
	ClosedAt            *time.Time `json:"closed_at"`
	MergedAt            *time.Time `json:"merged_at"`
	MergeCommitSHA      *string    `json:"merge_commit_sha"`
	Assignee            *Actor     `json:"assignee"`
	Assignees           *[]Actor   `json:"assignees"`
	RequestedReviewers  *[]Actor   `json:"requested_reviewers"`
	Milestone           *Milestone `json:"milestone"`
	Merged              *bool      `json:"merged"`
	Mergeable           *bool      `json:"mergeable"`
	MergedBy            *Actor     `json:"merged_by"`
	MergeableState      *string    `json:"mergeable_state"`
	Rebaseable          *bool      `json:"rebaseable"`
	Comments            *int       `json:"comments"`
	ReviewComments      *int       `json:"review_comments"`
	MaintainerCanModify *bool      `json:"maintainer_can_modify"`
	Commits             *int       `json:"commits"`
	Additions           *int       `json:"additions"`
	Deletions           *int       `json:"deletions"`
	ChangedFiles        *int       `json:"changed_files"`
}

PullRequest - GHA Pull Request structure

type RawEndpoint

type RawEndpoint struct {
	Name    string            `yaml:"name"`
	Flags   map[string]string `yaml:"flags"`
	Skip    []string          `yaml:"skip"`
	Only    []string          `yaml:"only"`
	Project string            `yaml:"project"`
	SkipREs []*regexp.Regexp  `yaml:"-"`
	OnlyREs []*regexp.Regexp  `yaml:"-"`
}

RawEndpoint holds data source endpoint with possible flags how to generate the final endpoints flags can be "type: github_org/github_user" which means that we need to get actual repository list from github org/user

type Release

type Release struct {
	ID              int        `json:"id"`
	TagName         string     `json:"tag_name"`
	TargetCommitish string     `json:"target_commitish"`
	Name            *string    `json:"name"`
	Draft           bool       `json:"draft"`
	Author          Actor      `json:"author"`
	Prerelease      bool       `json:"prerelease"`
	CreatedAt       time.Time  `json:"created_at"`
	PublishedAt     *time.Time `json:"published_at"`
	Body            *string    `json:"body"`
	Assets          []Asset    `json:"assets"`
}

Release - GHA Release structure

type Repo

type Repo struct {
	ID   int    `json:"id"`
	Name string `json:"name"`
}

Repo - GHA Repo structure

type Team

type Team struct {
	ID         int    `json:"id"`
	Name       string `json:"name"`
	Slug       string `json:"slug"`
	Permission string `json:"permission"`
}

Team - GHA Team structure (only used before 2015)

Directories

Path Synopsis
cmd

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL