Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BulkIndex ¶
type BulkIndex struct {
ForceDeletion bool `yaml:"force_deletion"` // Glean's force deletion flag
ForceRestart bool `yaml:"force_restart"` // Glean's force restart flag
ReaddirEntries int `yaml:"readdir_entries"` // number of entries per Readdir call.
DocumentRequestSize int `yaml:"document_request_size"` // number of documents to include in a single request in a single bulk index request.
UserRequestSize int `yaml:"user_request_size"` // number of user to include in a single request in a single bulk index request.
CacheConcurrency int `yaml:"cache_concurrency"` // number of concurrent cache reads.
}
BulkIndex represents the configuration info for a Glean builk index operation.
type Conversion ¶
type Conversion struct {
Type content.Type
Converter *Converter
Datasource *gleansdk.CustomDatasourceConfig
}
Conversion represents the configuration for a single content conversion operation.
type Converter ¶
type Converter struct {
FromContentType []content.Type `yaml:"from_content_types,flow"` // Content types that this converter can handle.
ViewURLRewrites []string `yaml:"view_url_rewrites"` // Rewrite rules for viewurls specified as textutil.RewriteRules
AllowAnonymousAccess bool `yaml:"allow_anonymous_access"` // allow anonymous access to the converted documents.
// Default author to use if none can be obtained from the document itself.
DefaultAuthor User `yaml:"default_author"`
CustomConfig yaml.Node `yaml:"custom"`
}
Converter represents the ability to convert from a set of content types to to a Glean document ("glean/document")
type Crawl ¶
type Crawl struct {
crawlcmd.Config `yaml:",inline" cmd:"crawl configuration"`
Service CrawlService `yaml:",inline" cmd:"service to be crawled"`
}
Crawl represents a single crawl that contributes data to a datasource.
type CrawlService ¶
type CrawlService struct {
Name string `yaml:"service_name" cmd:"name of service to crawl, eg. s3/aws"`
Config yaml.Node `yaml:"service_config" cmd:"service specific configuration, eg. cloudeng.io/aws/awsconfig.AWSFlags"`
}
CrawlService represents the configuration of a specific service to be crawled, eg. to contain configuration for accessing a cloud service.
type Datasource ¶
type Datasource struct {
// Datasource name.
Datasource string `yaml:"datasource" cmd:"name of the datasource"`
Crawls []Crawl `yaml:"crawls,omitempty" cmd:"file based crawls to run for this datasource"`
// API based 'crawls' that obtain data for this datasource.
APICrawls apicrawlcmd.Crawls `yaml:"api_crawls,omitempty" cmd:"api crawls to run for this datasource"`
// Bulk index configuration for this datasource.
*BulkIndex `yaml:"bulk_index,omitempty" cmd:"bulk index configuration for this datasource"`
// Incremental index configuration for this datasource.
*IncrementalIndex `yaml:"incremental_index,omitempty" cmd:"incremental index configuration for this datasource"`
// Converters (from download.Result to Glean document) configuration.
Converters []Converter `yaml:"converters,omitempty" cmd:"converters for this datasource"`
// GleanDomain is the domain of the Glean instance to use.
GleanDomain string `yaml:"glean_domain" cmd:"glean domain to use"`
// GleanTokenName is the name of the glean token to use to access the Glean instance.
GleanTokenName string `yaml:"glean_token_name" cmd:"glean token name to use, if empty the glean_domain will be used"`
// The Glean datasource configuration in YAML as opposed to JSON
// format.
GleanDatasource GleanDatasource `yaml:"glean_datasource_config" cmd:"glean datasource configuration, ie. the glean datasource to be indexed"`
}
Datasource represents a single datasource or corpus to be crawled and indexed.
func DatasourceForName ¶
DatasourceForName returns the datasource configuration for the named datasource read from the specified config file.
func (Datasource) ConfigForContentType ¶
func (d Datasource) ConfigForContentType() map[content.Type]Conversion
ConfigForContentType returns a map from content type to all of the configuration information that pertains to that content type.
type DatasourceName ¶
type DatasourceName struct {
Datasource string `subcmd:"datasource,,name of the datasource"`
}
type Datasources ¶
type Datasources []Datasource
Datasources represents a list of named datasources.
func (Datasources) ConfigForName ¶
func (d Datasources) ConfigForName(name string) (Datasource, bool)
ConfigForName for returns the configuration for the named datasource.
type FileFlags ¶
type FileFlags struct {
ConfigFile string `subcmd:"datasource-configs,,datasource config file"`
}
FileFlags represents a command line flag for the datasource config file.
type GleanDatasource ¶
type GleanDatasource struct {
// GleanConfig is the datasource configuration for the Glean instance.
gleansdk.CustomDatasourceConfig `yaml:",inline" cmd:"glean custom datasource configuration"`
}
GleanDatasource represents the configuration of the datasource with Glean's API.
type IncrementalIndex ¶
type IncrementalIndex struct {
DeletionDelay time.Duration `yaml:"deletion_delay"` // Documents that have not been updated within deletion delay will be removed the Glean index.
}
IncrementalIndex represents the configuration info for incremental, document at a time, indexing.