syncdatasources

package module
v0.0.0-...-3b7efca Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 11, 2022 License: Apache-2.0 Imports: 29 Imported by: 3

Documentation

Index

Constants

View Source
const APIToken string = "api-token"

APIToken - constant string

View Source
const BackendPassword string = "backend-password"

BackendPassword - backend-password

View Source
const BackendUser string = "backend-user"

BackendUser - backend-user

View Source
const Bitergia string = "bitergia"

Bitergia - bitergia

View Source
const Bugzilla string = "bugzilla"

Bugzilla - bugzilla

View Source
const BugzillaRest string = "bugzillarest"

BugzillaRest - bugzillarest (requires Bugzilla 5.X)

View Source
const Confluence string = "confluence"

Confluence - confluence

View Source
const CopyFromDateField = "metadata__enriched_on" // Date when the item was enriched and stored in the index with enriched documents. (currently best IMHO - LG)

CopyFromDateField - field used to find most recent document and start copying from datetime from that field

View Source
const DADS string = "dads"

DADS - config flag in the fixture that allows selecting when to run dads instead of p2o

View Source
const DadsException string = "DA_DS_ERROR(time="

DadsException - string that identified dads exception

View Source
const DadsWarning string = "da-ds WARNING"

DadsWarning - string that identified dads exception

View Source
const Delete string = "DELETE"

Delete - DELETE

View Source
const Discourse string = "discourse"

Discourse - discourse

View Source
const DockerHub string = "dockerhub"

DockerHub - dockerhub

View Source
const Email string = "email"

Email - email

View Source
const External string = "external"

External - external

View Source
const FromDate string = "from-date"

FromDate - from-date

View Source
const Gerrit string = "gerrit"

Gerrit - gerrit

View Source
const Get string = "GET"

Get - GET

View Source
const Git string = "git"

Git - git

View Source
const GitHub string = "github"

GitHub - github

View Source
const GitHubOrg string = "github_org"

GitHubOrg - github_org

View Source
const GitHubUser string = "github_user"

GitHubUser - github_user

View Source
const GoogleGroups string = "googlegroups"

GoogleGroups data source

View Source
const GroupsIO string = "groupsio"

GroupsIO - groupsio

View Source
const Head string = "HEAD"

Head - HEAD

View Source
const Jenkins string = "jenkins"

Jenkins - jenkins

View Source
const Jira string = "jira"

Jira - jira

View Source
const Locked string = "locked"

Locked - locked

View Source
const MeetUp string = "meetup"

MeetUp - meetup

View Source
const Nil string = "<nil>"

Nil - used to specify an empty environment variable in the fixture (fo dads)

View Source
const Null string = "(null)"

Null - used to specify a null value

View Source
const OK string = "ok"

OK - common constant string

View Source
const Password string = "password"

Password - password

View Source
const Pipermail string = "pipermail"

Pipermail - pipermail

View Source
const Post string = "POST"

Post - POST

View Source
const ProjectNoOrigin string = "--no-origin--"

ProjectNoOrigin - special marker to set project on all index data

View Source
const Put string = "PUT"

Put - PUT

View Source
const PyException string = "Traceback (most recent call last)"

PyException - string that identified python exception

View Source
const Redacted string = "[redacted]"

Redacted - [redacted]

View Source
const RocketChat string = "rocketchat"

RocketChat - rocketchat

View Source
const SDSMtx string = "sdsmtx"

SDSMtx - sdsmtx

View Source
const SSHKey string = "ssh-key"

SSHKey - constant string

View Source
const SearchScroll string = "/_search/scroll"

SearchScroll - /_search/scroll

View Source
const Slack string = "slack"

Slack - slack

View Source
const Unlocked string = "unlocked"

Unlocked - unlocked

View Source
const User string = "user"

User - user

View Source
const UserID string = "user-id"

UserID - user-id

Variables

View Source
var (
	// GRedactedStrings - need to be global, to redact them from error logs
	GRedactedStrings map[string]struct{}
	// GRedactedMtx - guard access to this map while in MT
	GRedactedMtx *sync.RWMutex
)
View Source
var ErrorStrings = map[int]string{
	-3: "task was not executed due to frequency check",
	-2: "task is configured as a copy from another index pattern",
	-1: "task was skipped",
	1:  "datasource slug contains > 1 '/' separators",
	2:  "incorrect endpoint value for given data source",
	3:  "incorrect config option(s) for given data source",
	4:  "p2o.py error",
	5:  "setting SSH private key error",
	6:  "command timeout error",
	7:  "index copy error",
}

ErrorStrings - array of possible errors returned from enrich tasks

Functions

func AddRedacted

func AddRedacted(newRedacted string, useMutex bool)

AddRedacted - adds redacted string

func CSVHeader

func CSVHeader() []string

CSVHeader - CSV header fields

func DayStart

func DayStart(dt time.Time) time.Time

DayStart - return time rounded to current day start

func EndpointIncluded

func EndpointIncluded(ctx *Ctx, ep *RawEndpoint, origin string) (bool, int)

EndpointIncluded - checks if given endpoint's origin should be included or excluded based on endpoint's skip/only regular expressions lists First return value specifies if endpoint is included or not Second value specifies: 1 - included by 'only' condition, 2 - skipped by 'skip' condition

func EnsureIndex

func EnsureIndex(ctx *Ctx, index string, init bool)

EnsureIndex - ensure that given index exists in ES init: when this flag is set, do not use syncdatasources.Printf which would cause infinite recurence

func EsLog

func EsLog(ctx *Ctx, msg string, dt time.Time) error

EsLog - log data into ES "sdslog" index

func ExecCommand

func ExecCommand(ctx *Ctx, cmdAndArgs []string, env map[string]string, tmout *time.Duration) (string, error)

ExecCommand - execute command given by array of strings with eventual environment map

func FatalNoLog

func FatalNoLog(err error) string

FatalNoLog displays error message (if error present) and exits program, should be used for very early init state

func FatalOnError

func FatalOnError(err error) string

FatalOnError displays error message (if error present) and exits program

func Fatalf

func Fatalf(f string, a ...interface{})

Fatalf - it will call FatalOnError using fmt.Errorf with args provided

func FilterRedacted

func FilterRedacted(str string) string

FilterRedacted - filter out all known redacted starings

func GHClient

func GHClient(ctx *Ctx) (ghCtx context.Context, clients []*github.Client)

GHClient - get GitHub client

func GHClientForKeys

func GHClientForKeys(oAuths map[string]struct{}) (ghCtx context.Context, clients []*github.Client)

GHClientForKeys - get GitHub client for given keys

func GetAPIToken

func GetAPIToken() (string, error)

GetAPIToken - return an API token to use dev-analytics-api API calls If JWT_TOKEN env is specified - just use the provided token without any checks Else get auth0 data from AUTH0_DATA and generate/reuse a token stored in ES cache

func GetDockerHubRepos

func GetDockerHubRepos(ctx *Ctx, dockerhubOwner string) (repos []string, err error)

GetDockerHubRepos - return list of repos for given dockerhub server

func GetFixtures

func GetFixtures(ctx *Ctx, path string) (fixtures []string)

GetFixtures - read all fixture files

func GetGerritRepos

func GetGerritRepos(ctx *Ctx, gerritURL string) (projects, repos []string, err error)

GetGerritRepos - return list of repos for given gerrit server (uses HTML crawler)

func GetRateLimits

func GetRateLimits(gctx context.Context, ctx *Ctx, gcs []*github.Client, core bool) (int, []int, []int, []time.Duration)

GetRateLimits - returns all and remaining API points and duration to wait for reset when core=true - returns Core limits, when core=false returns Search limits

func GetRedacted

func GetRedacted() (str string)

GetRedacted - get redacted

func GetRocketChatChannels

func GetRocketChatChannels(ctx *Ctx, srv, token, uid string) (channels []string, err error)

GetRocketChatChannels - return list of channels defined on a given RocketChat server

func GetSlackBotUsersConversation

func GetSlackBotUsersConversation(ctx *Ctx, token string) (ids, channels []string, err error)

GetSlackBotUsersConversation - return list of channels (Slack users.converstations API) available for a given slack bot user (specified by a bearer token)

func GetThreadsNum

func GetThreadsNum(ctx *Ctx) int

GetThreadsNum returns the number of available CPUs If environment variable SDS_ST is set it retuns 1 It can be used to debug single threaded verion

func GroupIncluded

func GroupIncluded(ctx *Ctx, gc *GroupConfig, origin string) bool

GroupIncluded - checks if given endpoint's origin matches a given group configuration Return value specifies if endpoint is included or not

func Hash

func Hash(str string, nodeIdx, nodeNum int) (int, bool)

Hash for given string 'str' calculate hash value and then transform it into [0, nodeNum) number If nodeNum matches nodeIdx then hash is correct for this node, otherwise it isn't

func HourStart

func HourStart(dt time.Time) time.Time

HourStart - return time rounded to current hour start

func InitializeAuth0

func InitializeAuth0() error

InitializeAuth0 - initializes Auth0 client using data stored in AUTH0_DATA

func IsRedacted

func IsRedacted(name string) bool

IsRedacted - returns whatever "name" config option should be redacted or not

func MonthStart

func MonthStart(dt time.Time) time.Time

MonthStart - return time rounded to current month start

func NextDayStart

func NextDayStart(dt time.Time) time.Time

NextDayStart - return time rounded to next day start

func NextHourStart

func NextHourStart(dt time.Time) time.Time

NextHourStart - return time rounded to next hour start

func NextMonthStart

func NextMonthStart(dt time.Time) time.Time

NextMonthStart - return time rounded to next month start

func NextQuarterStart

func NextQuarterStart(dt time.Time) time.Time

NextQuarterStart - return time rounded to next quarter start

func NextWeekStart

func NextWeekStart(dt time.Time) time.Time

NextWeekStart - return time rounded to next week start

func NextYearStart

func NextYearStart(dt time.Time) time.Time

NextYearStart - return time rounded to next year start

func PeriodParse

func PeriodParse(perStr string) (dur time.Duration, ok bool)

PeriodParse - tries to parse period

func PrevDayStart

func PrevDayStart(dt time.Time) time.Time

PrevDayStart - return time rounded to prev day start

func PrevHourStart

func PrevHourStart(dt time.Time) time.Time

PrevHourStart - return time rounded to prev hour start

func PrevMonthStart

func PrevMonthStart(dt time.Time) time.Time

PrevMonthStart - return time rounded to prev month start

func PrevQuarterStart

func PrevQuarterStart(dt time.Time) time.Time

PrevQuarterStart - return time rounded to prev quarter start

func PrevWeekStart

func PrevWeekStart(dt time.Time) time.Time

PrevWeekStart - return time rounded to prev week start

func PrevYearStart

func PrevYearStart(dt time.Time) time.Time

PrevYearStart - return time rounded to prev year start

func PrintLogf

func PrintLogf(format string, args ...interface{}) (err error)

PrintLogf is a wrapper around Printf(...) that supports logging.

func Printf

func Printf(format string, args ...interface{}) (n int, err error)

Printf is a wrapper around Printf(...) that supports logging.

func PrintfRedacted

func PrintfRedacted(format string, args ...interface{}) (n int, err error)

PrintfRedacted is a wrapper around fmt.Printf(...) that supports logging.

func ProgressInfo

func ProgressInfo(i, n int, start time.Time, last *time.Time, period time.Duration, msg string)

ProgressInfo display info about progress: i/n if current time >= last + period If displayed info, update last

func QuarterStart

func QuarterStart(dt time.Time) time.Time

QuarterStart - return time rounded to current month start

func SafeString

func SafeString(str string) string

SafeString - return safe string without control characters and unicode correct Other options would be to replace non-OK characters with "%HH" - their hexcode ES would understand this

func StringToBool

func StringToBool(v string) bool

StringToBool - convert string value to boolean value returns false for anything that was parsed as false, zero, empty etc: f, F, false, False, fALSe, 0, "", 0.00 else returns true

func TimeParseAny

func TimeParseAny(dtStr string) time.Time

TimeParseAny - attempts to parse time from string YYYY-MM-DD HH:MI:SS Skipping parts from right until only YYYY id left

func ToYMDDate

func ToYMDDate(dt time.Time) string

ToYMDDate - return time formatted as YYYY-MM-DD

func ToYMDHMSDate

func ToYMDHMSDate(dt time.Time) string

ToYMDHMSDate - return time formatted as YYYY-MM-DD HH:MI:SS

func WeekStart

func WeekStart(dt time.Time) time.Time

WeekStart - return time rounded to current week start Assumes first week day is Sunday

func YearStart

func YearStart(dt time.Time) time.Time

YearStart - return time rounded to current month start

Types

type Alias

type Alias struct {
	From     string      `yaml:"from"`
	To       []string    `yaml:"to"`
	Dedup    []string    `yaml:"dedup"`
	Views    []AliasView `yaml:"views"`
	NoEnrich bool        `yaml:"no_enrich"`
}

Alias conatin indexing aliases data, single index from (source) and list of aliases that should point to that index

type AliasView

type AliasView struct {
	Name   string      `yaml:"name"`
	Filter interface{} `yaml:"filter"`
}

AliasView - allows creating "filtered aliases"/"views" API: POST /_aliases '{"actions":[{"add":{"index":"sds-lfn-onap-git-for-merge","alias":"test-lg","filter":{"term":{"project":"CLI"}}}}]}'

type ColumnCondition

type ColumnCondition struct {
	Column string `yaml:"column"`
	Value  string `yaml:"value"`
}

ColumnCondition - holds single must or must_not condition for setting project witing a single endpoint

type Config

type Config struct {
	Name  string            `yaml:"name"`
	Value string            `yaml:"value"`
	Flags map[string]string `yaml:"flags"`
}

Config holds data source config options

func (Config) RedactedString

func (c Config) RedactedString() string

RedactedString - redacted string output

func (Config) String

func (c Config) String() string

String - default string output for a config

type CopyConfig

type CopyConfig struct {
	Pattern     string `yaml:"pattern"`
	Incremental bool   `yaml:"incremental"`
	// if set, data will be copied since the most recent data already copied, so you can use no_origin to specify how to copy data
	// if not set, every copy operation will overwrite all data in destination index (which is the default)
	NoOrigin bool `yaml:"no_origin"` // skip checking origin when calculating start date to copy
	// if no_origin is set, then copying will start from the date of the last document stored in the destination index
	//    (can be used when the source has multiple origins or origin(s) different than endpoint's origin)
	// if no_origin is not set it will query destination index for origin of the destination endpoint
	//    and will start copying source -> dest from that date (this is the default)
	Must    []ColumnCondition `yaml:"must"`
	MustNot []ColumnCondition `yaml:"must_not"`
}

CopyConfig - holds data related to copy from other index configuration

type Ctx

type Ctx struct {
	Debug                           int            // From SDS_DEBUG Debug level: 0-no, 1-info, 2-verbose
	CmdDebug                        int            // From SDS_CMDDEBUG Commands execution Debug level: 0-no, 1-only output commands, 2-output commands and their output, 3-output full environment as well, default 0
	MaxRetry                        int            // From SDS_MAXRETRY Try to run grimoire stack (perceval, p2o.py etc) that many times before reporting failure, default 0 (1 original - always runs and 0 more attempts).
	ST                              bool           // From SDS_ST true: use single threaded version, false: use multi threaded version, default false
	NCPUs                           int            // From SDS_NCPUS, set to override number of CPUs to run, this overwrites SDS_ST, default 0 (which means do not use it, use all CPU reported by go library)
	NCPUsScale                      float64        // From SDS_NCPUS_SCALE, scale number of CPUs, for example 2.0 will report number of cpus 2.0 the number of actually available CPUs
	FixturesRE                      *regexp.Regexp // From SDS_FIXTURES_RE - you can set regular expression specifying which fixtures should be processed, default empty which means all.
	DatasourcesRE                   *regexp.Regexp // From SDS_DATASOURCES_RE - you can set regular expression specifying which datasources should be processed, default empty which means all.
	ProjectsRE                      *regexp.Regexp // From SDS_PROJECTS_RE - you can set regular expression specifying which projects/subprojects should be processed, default empty which means all.
	EndpointsRE                     *regexp.Regexp // From SDS_ENDPOINTS_RE - you can set regular expression specifying which endpoints/origins should be processed, default empty which means all.
	TasksRE                         *regexp.Regexp // From SDS_TASKS_RE - you can set regular expression specifying which tasks should be processed, default empty which means all, exampel task is "sds-lfn-onap-slack:SLACK_CHAN_ID"
	FixturesSkipRE                  *regexp.Regexp // From SDS_FIXTURES_SKIP_RE - you can set regular expression specifying which fixtures should be skipped, default empty which means none.
	DatasourcesSkipRE               *regexp.Regexp // From SDS_DATASOURCES_SKIP_RE - you can set regular expression specifying which datasources should be skipped, default empty which means none.
	ProjectsSkipRE                  *regexp.Regexp // From SDS_PROJECTS_SKIP_RE - you can set regular expression specifying which projects/subprojects should be slkipped, default empty which means none.
	EndpointsSkipRE                 *regexp.Regexp // From SDS_ENDPOINTS_SKIP_RE - you can set regular expression specifying which endpoints/origins should be skipped, default empty which means none.
	TasksSkipRE                     *regexp.Regexp // From SDS_TASKS_SKIP_RE - you can set regular expression specifying which tasks should be skipped, default empty which means none.
	TasksExtraSkipRE                *regexp.Regexp // From SDS_TASKS_EXTRA_SKIP_RE - you can set regular expression specifying which tasks should be skipped, default empty which means none.
	CtxOut                          bool           // From SDS_CTXOUT output all context data (this struct), default false
	LogTime                         bool           // From SDS_SKIPTIME, output time with all lib.Printf(...) calls, default true, use SDS_SKIPTIME to disable
	ExecFatal                       bool           // default true, set this manually to false to avoid lib.ExecCommand calling os.Exit() on failure and return error instead
	ExecQuiet                       bool           // default false, set this manually to true to have quiet exec failures
	ExecOutput                      bool           // default false, set to true to capture commands STDOUT
	ExecOutputStderr                bool           // default false, set to true to capture commands STDOUT
	ElasticURL                      string         // From SDS_ES_URL, ElasticSearch URL, default http://127.0.0.1:9200
	EsBulkSize                      int            // From SDS_ES_BULKSIZE, ElasticSearch bulk size when enriching data, defaults to 0 which means "not specified" (10000)
	NodeHash                        bool           // From SDS_NODE_HASH, if set it will generate hashes for each task and only execute them when node number matches hash result
	NodeNum                         int            // From SDS_NODE_NUM, set number of nodes, so hashing function will return [0, ... n)
	NodeIdx                         int            // From SDS_NODE_IDX, set number of current node, so only hashes matching this node will run
	NodeSettleTime                  int            // From SDS_NODE_SETTLE_TIME, number of seconds that master gives nodes to start-up and wait for ES mutex9es) to sync with master node, default 10 (in seconds)
	DryRun                          bool           // From SDS_DRY_RUN, if set it will do everything excluding actual grimoire stack execution (will report success for all commands instead)
	DryRunCode                      int            // From SDS_DRY_RUN_CODE, dry run exit code, default 0 which means success, possible values 1, 2, 3, 4
	DryRunCodeRandom                bool           // From SDS_DRY_RUN_CODE_RANDOM, dry run exit code, will return random value from 0 to 5
	DryRunSeconds                   int            // From SDS_DRY_RUN_SECONDS, simulate each dry run command taking some time to execute
	DryRunSecondsRandom             bool           // From SDS_DRY_RUN_SECONDS_RANDOM, make running time from 0 to SDS_DRY_RUN_SECONDS (in ms resolution)
	DryRunAllowSSH                  bool           // From SDS_DRY_RUN_ALLOW_SSH, if set it will allow setting SSH keys in dry run mode
	DryRunAllowFreq                 bool           // From SDS_DRY_RUN_ALLOW_FREQ, if set it will allow processing sync frequency data in dry run mode
	DryRunAllowMtx                  bool           // From SDS_DRY_RUN_ALLOW_MTX, if set it will allow handling ES mutexes (for nodes concurrency support) in dry run mode
	DryRunAllowRename               bool           // From SDS_DRY_RUN_ALLOW_RENAME, if set it will allow handling ES index renaming in dry run mode
	DryRunAllowOrigins              bool           // From SDS_DRY_RUN_ALLOW_ORIGINS, if set it will allow fetching external indices origins list in dry run mode
	DryRunAllowDedup                bool           // From SDS_DRY_RUN_ALLOW_DEDUP, if set it will allow dedup bitergia data by deleting origins shared with existing SDS indices
	DryRunAllowFAliases             bool           // From SDS_DRY_RUN_ALLOW_F_ALIASES, if set it will allow creating/maintaining foundaion-f aliases in dry run mode
	DryRunAllowProject              bool           // From SDS_DRY_RUN_ALLOW_PROJECT, if set it will allow running set project by SDS (on endpoints with project set and p2o mode set to false)
	DryRunAllowSyncInfo             bool           // From SDS_DRY_RUN_ALLOW_SYNC_INFO, if set it will allow setting sync info in sds-sync-info index
	DryRunAllowSortDuration         bool           // From SDS_DRY_RUN_ALLOW_SORT_DURATION, if set it will allow setting sync info in sds-sync-info index
	DryRunAllowMerge                bool           // From SDS_DRY_RUN_ALLOW_MERGE, if set it will allow calling DA-affiliation merge_all API after all tasks finished in dry run mode
	DryRunAllowHideEmails           bool           // From SDS_DRY_RUN_ALLOW_HIDE_EMAILS, if set it will allow calling DA-affiliation hide_emails API in dry run mode
	DryRunAllowCacheTopContributors bool           // From SDS_DRY_RUN_ALLOW_CACHE_TOP_CONTRIBUTORS, if set it will allow calling DA-affiliation cache_top_contributors API in dry run mode
	DryRunAllowOrgMap               bool           // From SDS_DRY_RUN_ALLOW_ORG_MAP, if set it will allow calling DA-affiliation map_org_names API in dry run mode
	DryRunAllowEnrichDS             bool           // From SDS_DRY_RUN_ALLOW_ENRICH_DS, if set it will allow calling DA-metrics enrich API in dry run mode
	DryRunAllowDetAffRange          bool           // From SDS_DRY_RUN_ALLOW_DET_AFF_RANGE, if set it will allow calling DA-affiliation det_aff_range API in dry run mode
	DryRunAllowCopyFrom             bool           // From SDS_DRY_RUN_ALLOW_COPY_FROM, if set it will allow copy index in dry run mode
	DryRunAllowMetadata             bool           // From SDS_DRY_RUN_ALLOW_METADATA, if set it will allow processing fixture metadata in dry run mode
	TimeoutSeconds                  int            // From SDS_TIMEOUT_SECONDS, set entire program execution timeout, program will finish with return code 2 if anything still runs after this time, default 47 h 45 min = 258660
	TaskTimeoutSeconds              int            // From SDS_TASK_TIMEOUT_SECONDS, set single p2o.py task execution timeout, default is 86400s (10 hours)
	NLongest                        int            // From SDS_N_LONGEST, number of longest running tasks to display in stats, default 30
	SkipSH                          bool           // From SDS_SKIP_SH, if set sorting hata database processing will be skipped
	SkipData                        bool           // From SDS_SKIP_DATA, if set - it will not run incremental data sync
	SkipAffs                        bool           // From SDS_SKIP_AFFS, if set - it will not run p2o.py historical affiliations enrichment (--only-enrich --refresh-identities --no_incremental)
	SkipAliases                     bool           // From SDS_SKIP_ALIASES, if set - sds will not attempt to create index aliases and will not attempt to drop unused aliases
	SkipDropUnused                  bool           // From SDS_SKIP_DROP_UNUSED, if set - it will not attempt to drop unused indexes and aliases
	NoIndexDrop                     bool           // From SDS_NO_INDEX_DROP, if set - it will warning about index drop needed instead of actual index drop
	SkipCheckFreq                   bool           // From SDS_SKIP_CHECK_FREQ, will skip maximum task sync frequency if set
	SkipEsData                      bool           // From SDS_SKIP_ES_DATA, will totally skip anything related to "sdsdata" index processing (storing SDS state)
	SkipEsLog                       bool           // From SDS_SKIP_ES_LOG, will skip writing logs to "sdslog" index
	SkipDedup                       bool           // From SDS_SKIP_DEDUP, will skip attemting to dedup data shared on existing SDS index and external bitergia index (by deleting shared origin data from the external Bitergia index)
	SkipFAliases                    bool           // From SDS_SKIP_F_ALIASES, will skip attemting to create/maintain oundation-f aliases
	SkipExternal                    bool           // From SDS_SKIP_EXTERNAL, will skip any external indices processing: enrichments, deduplication, affiliations etc.
	SkipProject                     bool           // From SDS_SKIP_PROJECT, will skip adding column "project": "project name" on all documents where origin = endpoint name, will also add timestamp column "project_ts", so next run can start on documents newer than that
	SkipProjectTS                   bool           // From SDS_SKIP_PROJECT_TS, will add project column as described above, without using "project_ts" column to determine from which document to start
	SkipSyncInfo                    bool           // From SDS_SKIP_SYNC_INFO, will skip adding sync info to sds-sync-info index
	SkipValGitHubAPI                bool           // From SDS_SKIP_VALIDATE_GITHUB_API, will not process GitHub orgs/users in validate step (will not attempt to get org's/user's repo lists)
	SkipSortDuration                bool           // From SDS_SKIP_SORT_DURATION, if set - it will skip tasks run order by last running time duration desc
	SkipMerge                       bool           // From SDS_SKIP_MERGE, if set - it will skip calling DA-affiliation merge_all API after all tasks finished
	SkipHideEmails                  bool           // From SDS_SKIP_HIDE_EMAILS, if set - it will skip calling DA-affiliation hide_emails API
	SkipMetadata                    bool           // From SDS_SKIP_METADATA, if set - it will skip processing fixture metadata
	SkipCacheTopContributors        bool           // From SDS_SKIP_CACHE_TOP_CONTRIBUTORS, if set - it will skip calling DA-affiliation cache_top_contributors API
	SkipOrgMap                      bool           // From SDS_SKIP_ORG_MAP, if set - it will skip calling DA-affiliation map_org_name API
	SkipEnrichDS                    bool           // From SDS_SKIP_ENRICH_DS, if set - it will skip calling DA-matrics enrich API
	SkipCopyFrom                    bool           // From SDS_SKIP_COPY_FROM, if set - it will skip copying index feature
	RunDetAffRange                  bool           // From SDS_RUN_DET_AFF_RANGE, if set - it will call DA-affiliation det_aff_range API (this is a very resource intensive API)
	SkipP2O                         bool           // From SDS_SKIP_P2O, if set - it will skip all p2o tasks and execute everything else
	StripErrorSize                  int            // From SDS_STRIP_ERROR_SIZE, default 16384, error messages longer that this value will be stripped by this value from beginning and from end, so for 16384 error 64000 bytes long will be 16384 bytes from the beginning \n(...)\n 16384 from the end
	GitHubOAuth                     string         // From SDS_GITHUB_OAUTH, if not set it attempts to use public access, if contains "/" it will assume that it contains file name, if "," found then it will assume that this is a list of OAuth tokens instead of just one
	LatestItems                     bool           // From SDS_LATEST_ITEMS, if set pass "latest items" or similar flag to the p2o.py backend (that should be handled by p2o.py using ES, so this is probably not a good ide, git backend, for example, can return no data then)
	CSVPrefix                       string         // From SDS_CSV_PREFIX, CSV logs filename prefix, default "jobs", so files would be "/root/.perceval/jobs_I_N.csv"
	Silent                          bool           // From SDS_SILENT, skip p2o.py debug mode if set, else it will pass "-g" flag to 'p2o.py' call
	NoMultiAliases                  bool           // From SDS_NO_MULTI_ALIASES, if set alias can only be defined for single index, so only one index maps to any alias, if not defined multiple input indexies can be accessed through a single alias (so it can have data from more than 1 p2o.py call)
	CleanupAliases                  bool           // From SDS_CLEANUP_ALIASES, will delete all aliases before creating them (so it can delete old indexes that were pointed by given alias before adding new indexes to it (single or multiple))
	ScrollWait                      int            // From SDS_SCROLL_WAIT, will pass 'p2o.py' '--scroll-wait=N' if set - this is to specify time to wait for available scrolls (in seconds), default 2700 (45 minutes)
	ScrollSize                      int            // From SDS_SCROLL_SIZE, ElasticSearch scroll size when enriching data, default 500
	MaxDeleteTrials                 int            // From SDS_MAX_DELETE_TRIALS, default 10
	MaxMtxWait                      int            // From SDS_MAX_MTX_WAIT, in seconds, default 900s
	MaxMtxWaitFatal                 bool           // From SDS_MAX_MTX_WAIT_FATAL, exit with error when waiting for mutex is more than configured amount of time
	EnrichExternalFreq              time.Duration  // From SDS_ENRICH_EXTERNAL_FREQ, how often enrich external indexes, default is 168h (7 days, week) which means no more often than 168h.
	OnlyValidate                    bool           // From SDS_ONLY_VALIDATE, if defined, SDS will only validate fixtures and exit 0 if all of them are valide, non-zero + error message otherwise
	OnlyP2O                         bool           // From SDS_ONLY_P2O, if defined, SDS will only run p2o tasks, will not do anything else.
	SkipReenrich                    string         // From SDS_SKIP_REENRICH, list of backend types where re-enrich phase is not needed, because they always fetch full data (don't support incremental updates), probably we can specify "jira,gerrit,confluence,bugzilla"
	AffiliationAPIURL               string         // From AFFILIATION_API_URL - DA affiliations API url
	Auth0Data                       string         // From AUTH0_DATA - auth0 data for da-ds (can be different than SDS auth0 data) - it's a stringified JSON
	MetricsAPIURL                   string         // From METRICS_API_URL - DA metrics API url
	Auth0URL                        string         // From AUTH0_URL: Auth0 parameters for obtaining DA-affiliation API token
	Auth0Audience                   string         // From AUTH0_AUDIENCE
	Auth0ClientID                   string         // From AUTH0_CLIENT_ID
	Auth0ClientSecret               string         // From AUTH0_CLIENT_SECRET
	Auth0GrantType                  string         // From AUTH0_GRANT_TYPE
	ShUser                          string         // From SH_USER: Sorting Hat database parameters
	ShHost                          string         // From SH_HOST
	ShPort                          string         // From SH_PORT
	ShPass                          string         // From SH_PASS
	ShDB                            string         // From SH_DB
	TestMode                        bool           // True when running tests
	OAuthKeys                       []string       // GitHub oauth keys recevide from SDS_GITHUB_OAUTH configuration (initialized only when lib.GHClient() is called)
	DynamicOAuth                    bool           // From SDS_DYNAMIC_OAUTH - instead of getting OAuth keys once, get the dynamically every time they're passed to subcommand da-ds/p2o.py
	GapURL                          string         // Data gab handelar api url
	Retries                         string         // number of retries to insert into elastic
	Delay                           string         // duration between each retry
	Environment                     string         // From ENVIRONMENT
	AwsDefaultRegion                string         // From AWS_DEFAULT_REGION
	AwsAccessKeyID                  string         // From AWS_ACCESS_KEY_ID
	AwsSecretAccessKey              string         // From AWS_SECRET_ACCESS_KEY
	LeFromAddr                      string         // FROM LE_FROMADDR
	LePassword                      string         // FROM LE_PASSWORD
	LeToAddrs                       string         // FROM LE_TOADDRS
}

Ctx - environment context packed in structure

func (*Ctx) Init

func (ctx *Ctx) Init()

Init - get context from environment variables

func (*Ctx) Print

func (ctx *Ctx) Print()

Print context contents

type DataSource

type DataSource struct {
	Slug          string        `yaml:"slug"`
	Config        []Config      `yaml:"config"`
	MaxFrequency  string        `yaml:"max_frequency"`
	Projects      []Project     `yaml:"projects"`
	RawEndpoints  []RawEndpoint `yaml:"endpoints"`
	HistEndpoints []RawEndpoint `yaml:"historical_endpoints"`
	IndexSuffix   string        `yaml:"index_suffix"`
	Endpoints     []Endpoint    `yaml:"-"`
	MaxFreq       time.Duration `yaml:"-"`
	FullSlug      string        `yaml:"-"`
	Settings      *interface{}  `yaml:"settings"`
}

DataSource contains data source spec from dev-analytics-api

func (DataSource) Configs

func (ds DataSource) Configs() string

Configs - return redacted configs as a string

func (DataSource) String

func (ds DataSource) String() string

type DockerHubData

type DockerHubData struct {
	Count   int                `json:"count"`
	Next    string             `json:"next"`
	Results []DockerHubResults `json:"results"`
}

DockerHubData - docker hub response format

type DockerHubResults

type DockerHubResults struct {
	User string `json:"user"`
	Name string `json:"name"`
}

DockerHubResults - holds user data

type Endpoint

type Endpoint struct {
	Name       string // Endpoint name
	Project    string // optional project (allows groupping endpoints), for example "Project value"
	ProjectP2O bool   // if true SDS will pass `--project "Project value"` to p2o.py
	// if false, SDS will post-process index and will add `"project": "Project value"`
	// column where `"origin": "Endpoint name"`
	ProjectNoOrigin   bool
	Timeout           time.Duration // specifies maximum running time for a given endpoint (if specified)
	CopyFrom          CopyConfig    // specifies optional 'copy_from' configuration
	AffiliationSource string
	Projects          []EndpointProject
	PairProgramming   bool
	Dummy             bool // used to mark that there is endpoint, but nothing should be done for it
	Groups            []GroupConfig
}

Endpoint holds data source endpoint (final endpoint generated from RawEndpoint)

type EndpointProject

type EndpointProject struct {
	Name    string            `yaml:"name"`
	Origin  string            `yaml:"origin"`
	Must    []ColumnCondition `yaml:"must"`
	MustNot []ColumnCondition `yaml:"must_not"`
}

EndpointProject - holds data for a single sub-endpoint project configuration

type EsAlias

type EsAlias struct {
	Alias string `json:"alias"`
	Index string `json:"index"`
}

EsAlias - keeps alias data as returned by ElasticSearch

type EsBulkItemStatus

type EsBulkItemStatus struct {
	Status int         `json:"status"`
	Error  interface{} `json:"error"`
}

EsBulkItemStatus - status

type EsBulkResult

type EsBulkResult struct {
	Items []EsBulkResultItem `json:"items"`
}

EsBulkResult - item statuses

type EsBulkResultItem

type EsBulkResultItem struct {
	Index EsBulkItemStatus `json:"index"`
}

EsBulkResultItem - index status

type EsByQueryPayload

type EsByQueryPayload struct {
	Updated int64 `json:"updated"`
	Deleted int64 `json:"deleted"`
}

EsByQueryPayload - update/delete by query result payload

type EsIndex

type EsIndex struct {
	Index string `json:"index"`
}

EsIndex - keeps index data as returned by ElasticSearch

type EsIndexSettings

type EsIndexSettings struct {
	IndexBlocksWrite *bool `json:"index.blocks.write"`
}

EsIndexSettings - index settings

type EsIndexSettingsPayload

type EsIndexSettingsPayload struct {
	Settings EsIndexSettings `json:"settings"`
}

EsIndexSettingsPayload - index settings payload

type EsLastRunPayload

type EsLastRunPayload struct {
	Index    string    `json:"index"`
	Endpoint string    `json:"endpoint"`
	Type     string    `json:"type"`
	Dt       time.Time `json:"dt"`
}

EsLastRunPayload - last run support

type EsLogPayload

type EsLogPayload struct {
	Msg string    `json:"msg"`
	Dt  time.Time `json:"dt"`
}

EsLogPayload - ES log single document

type EsMtxPayload

type EsMtxPayload struct {
	Mtx string    `json:"mtx"`
	Dt  time.Time `json:"dt"`
}

EsMtxPayload - ES mutex support (for locking concurrent nodes)

type EsSearchPayload

type EsSearchPayload struct {
	Query EsSearchQuery `json:"query"`
}

EsSearchPayload - ES search payload

type EsSearchQuery

type EsSearchQuery struct {
	QueryString EsSearchQueryString `json:"query_string"`
}

EsSearchQuery - ES search query

type EsSearchQueryString

type EsSearchQueryString struct {
	Query string `json:"query"`
}

EsSearchQueryString - ES search query string

type EsSearchResultHit

type EsSearchResultHit struct {
	Source EsSearchResultSource `json:"_source"`
	ID     string               `json:"_id"`
}

EsSearchResultHit - search result single hit

type EsSearchResultHits

type EsSearchResultHits struct {
	Hits []EsSearchResultHit `json:"hits"`
}

EsSearchResultHits - search result hits

type EsSearchResultPayload

type EsSearchResultPayload struct {
	Hits         EsSearchResultHits `json:"hits"`
	Aggregations interface{}        `json:"aggregations"`
}

EsSearchResultPayload - search result payload

type EsSearchResultSource

type EsSearchResultSource struct {
	Index                string    `json:"index"`
	Endpoint             string    `json:"endpoint"`
	Type                 string    `json:"type"`
	Mtx                  string    `json:"mtx"`
	Dt                   time.Time `json:"dt"`
	ProjectTS            int64     `json:"project_ts"`
	MDTimestamp          time.Time `json:"metadata__timestamp"`
	MDEnrichedOn         time.Time `json:"metadata__enriched_on"`
	MDUpdatedOn          time.Time `json:"metadata__updated_on"`
	GrimoireCreationDate time.Time `json:"grimoire_creation_date"`
}

EsSearchResultSource - search result single hit's source document

type EsSearchScrollPayload

type EsSearchScrollPayload struct {
	ScrollID string `json:"_scroll_id"`
}

EsSearchScrollPayload - search scroll result payload

type EsSyncInfoPayload

type EsSyncInfoPayload struct {
	Index             string     `json:"index"`
	Endpoint          string     `json:"endpoint"`
	Dt                time.Time  `json:"dt"`
	DataSyncAttemptDt *time.Time `json:"data_sync_attempt_dt"`
	DataSyncSuccessDt *time.Time `json:"data_sync_success_dt"`
	DataSyncErrorDt   *time.Time `json:"data_sync_error_dt"`
	DataSyncError     *string    `json:"data_sync_error"`
	DataSyncCL        *string    `json:"data_sync_command_line"`
	DataSyncRCL       *string    `json:"data_sync_redacted_command_line"`
	EnrichAttemptDt   *time.Time `json:"enrich_attempt_dt"`
	EnrichSuccessDt   *time.Time `json:"enrich_success_dt"`
	EnrichErrorDt     *time.Time `json:"enrich_error_dt"`
	EnrichError       *string    `json:"enrich_error"`
	EnrichCL          *string    `json:"enrich_command_line"`
	EnrichRCL         *string    `json:"enrich_redacted_command_line"`
}

EsSyncInfoPayload - sync info support

type EsUpdateByQueryPayload

type EsUpdateByQueryPayload struct {
	Updated int64 `json:"updated"`
}

EsUpdateByQueryPayload - update by query result payload

type Fixture

type Fixture struct {
	Disabled    bool         `yaml:"disabled"`
	AllowEmpty  bool         `yaml:"allow_empty"`
	Native      Native       `yaml:"native"`
	DataSources []DataSource `yaml:"data_sources"`
	Aliases     []Alias      `yaml:"aliases"`
	Metadata    Metadata     `yaml:"metadata"`
	Fn          string
	Slug        string
}

Fixture contains full YAML structure of dev-analytics-api fixture files

type GroupConfig

type GroupConfig struct {
	Name    string           `yaml:"name"`
	Skip    []string         `yaml:"skip"`
	Only    []string         `yaml:"only"`
	Self    bool             `yaml:"self"`    // If true, then group name = endpoint origin will be added
	Default bool             `yaml:"default"` // If set - this group will be used when no other groups match
	SkipREs []*regexp.Regexp `yaml:"-"`
	OnlyREs []*regexp.Regexp `yaml:"-"`
}

GroupConfig - holds repo group configuration (name + skip/only REGEXPs)

type MetaDataSource

type MetaDataSource struct {
	Name      string   `yaml:"name"`      // can be git, github/pull_request etc
	Slugs     []string `yaml:"slugs"`     // list of indices like 'finos/open-developer-platform/jira-for-merge', can start with 'pattern:', 'pattern:sds-finos-*-git-for-merge'
	Externals []string `yaml:"externals"` // external indices, for example 'bitergia-git-dump'
}

MetaDataSource - information about indices configured for a given data source (metadata section)

type MetaWorkingGroup

type MetaWorkingGroup struct {
	Name        string            `yaml:"name"`         // will map to "workinggroup" ES document field
	Meta        map[string]string `yaml:"meta"`         // values from this map (key/value) will map to ES "meta_key" = "value"
	NoOverwrite bool              `yaml:"no_overwrite"` // only set workinggroup and meta_* filed if they're not present yet
	DataSources []WGDataSource    `yaml:"datasources"`  // condintion where to apply metadata (origins and filters)
}

MetaWorkingGroup - information about working groups configured in a fixture (metadata section) To actually apply config at MetaDataSource must be found for WGDataSource and Meta map must have at least one element If meta map is empty, only "workinggroup" value will be set

type Metadata

type Metadata struct {
	DataSources   []MetaDataSource   `yaml:"datasources"`
	WorkingGroups []MetaWorkingGroup `yaml:"workinggroups"`
}

Metadata - keeps special data settings, currently this is used by FINOS

type MultiConfig

type MultiConfig struct {
	Name          string
	Value         []string
	RedactedValue []string
}

MultiConfig holds massaged config options, it can have >1 value for single option, for example GitHub API tokens: -t token1 token2 token3 ... tokenN

func (MultiConfig) String

func (mc MultiConfig) String() string

type Native

type Native struct {
	Slug              string `yaml:"slug"`
	AffiliationSource string `yaml:"affiliation_source"`
}

Native - keeps fixture slug and eventual global affiliation source

type Project

type Project struct {
	Name     string `yaml:"name"`
	P2O      *bool  `yaml:"p2o"`
	NoOrigin *bool  `yaml:"no_origin"` // if set, it will set project on a given index without conditional origin
	// so it should be used only to set a single project withing an entire datasource
	// possibly after copy_from usage
	RawEndpoints  []RawEndpoint `yaml:"endpoints"`
	HistEndpoints []RawEndpoint `yaml:"historical_endpoints"`
}

Project holds project data and list of endpoints

type RawEndpoint

type RawEndpoint struct {
	Name              string            `yaml:"name"`
	Flags             map[string]string `yaml:"flags"`
	Skip              []string          `yaml:"skip"`
	Only              []string          `yaml:"only"`
	Project           string            `yaml:"project"`
	ProjectP2O        *bool             `yaml:"p2o"`
	ProjectNoOrigin   *bool             `yaml:"no_origin"`
	Timeout           *string           `yaml:"timeout"`
	Projects          []EndpointProject `yaml:"endpoint_projects"`
	CopyFrom          CopyConfig        `yaml:"copy_from"`
	AffiliationSource string            `yaml:"affiliation_source"`
	PairProgramming   bool              `yaml:"pair_programming"`
	Groups            []GroupConfig     `yaml:"groups"`
	SkipREs           []*regexp.Regexp  `yaml:"-"`
	OnlyREs           []*regexp.Regexp  `yaml:"-"`
}

RawEndpoint holds data source endpoint with possible flags how to generate the final endpoints flags can be "type: github_org/github_user" which means that we need to get actual repository list from github org/user

type Task

type Task struct {
	Endpoint            string
	Config              []Config
	DsSlug              string
	FxSlug              string
	FxFn                string
	MaxFreq             time.Duration
	CommandLine         string
	RedactedCommandLine string
	Env                 map[string]string
	Retries             int
	Err                 error
	Duration            time.Duration
	DsFullSlug          string
	ExternalIndex       string
	Project             string
	ProjectP2O          bool
	ProjectNoOrigin     bool
	Projects            []EndpointProject
	Millis              int64
	Timeout             time.Duration
	CopyFrom            CopyConfig
	PairProgramming     bool
	AffiliationSource   string
	Groups              []string
	Dummy               bool
	Flags               map[string]string
}

Task holds single endpoint task and its context (required config, fixture filename etc.)

func (Task) ShortString

func (t Task) ShortString() string

ShortString - output quick endpoint info (usually used for non finished tasks)

func (Task) ShortStringCmd

func (t Task) ShortStringCmd(ctx *Ctx) string

ShortStringCmd - output quick endpoint info (with command line)

func (Task) String

func (t Task) String() string

String - default string output for a task (generic)

func (Task) ToCSV

func (t Task) ToCSV() []string

ToCSV - outputs array of string for CSV output of this task

func (Task) ToCSVNotRedacted

func (t Task) ToCSVNotRedacted() []string

ToCSVNotRedacted - outputs array of string for CSV output of this task (without redacting sensitive data)

type TaskMtx

type TaskMtx struct {
	SSHKeyMtx    *sync.Mutex
	TaskOrderMtx *sync.Mutex
	SyncInfoMtx  *sync.Mutex
	SyncFreqMtx  *sync.RWMutex
	OrderMtx     map[int]*sync.Mutex
}

TaskMtx - holds are mutexes used in task processing

type TaskResult

type TaskResult struct {
	Code                [2]int
	CommandLine         string
	RedactedCommandLine string
	Env                 map[string]string
	Retries             int
	Affs                bool
	Err                 error
	Index               string
	Endpoint            string
	Ds                  string
	Fx                  string
	Projects            []EndpointProject
}

TaskResult is a return type from task execution It contains task index Code[0], error code Code[1] and task final commandline

type WGDataSource

type WGDataSource struct {
	Name    string                 `yaml:"name"`    // must match name from MetaDataSource to find indices/patterns to apply to
	Origins []string               `yaml:"origins"` // List of origins to apply metadata to
	Filter  map[string]interface{} `yaml:"filter"`  // Eventual filter definition - to apply metadata to (in addition to origins)
}

WGDataSource - contains origins and eventually filter(s) to specify where to apply metadata

Directories

Path Synopsis
cmd

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL