Documentation ¶
Index ¶
- Constants
- func Classify(schemaPath string, dataset string, config *IngestTaskConfig) (string, error)
- func Clean(schemaFile string, dataset string, params *IngestParams, ...) (string, error)
- func CloneDataset(sourceDatasetID string, cloneDatasetID string, cloneFolder string, ...) error
- func CopyDiskDataset(existingURI string, newURI string, newDatasetID string, newStorageName string) (*api.DiskDataset, error)
- func CreateComposedVariable(metaStorage api.MetadataStorage, dataStorage api.DataStorage, dataset string, ...) error
- func CreateDataset(dataset string, datasetCtor DatasetConstructor, outputPath string, ...) (string, string, error)
- func CreateDatasetFromResult(newDatasetName string, predictionDataset string, sourceDataset string, ...) (string, error)
- func CreateFeaturizedDatasetID(datasetID string) string
- func DeleteDataset(ds *api.Dataset, metaStorage api.MetadataStorage, dataStorage api.DataStorage, ...) error
- func DeleteQueryCache(datasetID string)
- func ExportDataset(dataset string, metaStorage api.MetadataStorage, dataStorage api.DataStorage, ...) (string, string, error)
- func Featurize(originalSchemaFile string, schemaFile string, data api.DataStorage, ...) error
- func FeaturizeDataset(originalSchemaFile string, schemaFile string, dataset string, ...) (string, string, error)
- func Format(schemaFile string, dataset string, config *IngestTaskConfig) (string, error)
- func GeocodeForwardDataset(schemaFile string, dataset string, config *IngestTaskConfig) (string, error)
- func GetUniqueOutputFolder(dataset string, outputPath string) (string, error)
- func ImportPredictionDataset(params *PredictParams) (string, string, error)
- func Ingest(originalSchemaFile string, schemaFile string, data api.DataStorage, ...) (string, error)
- func IngestMetadata(originalSchemaFile string, schemaFile string, data api.DataStorage, ...) (string, error)
- func IngestPostgres(originalSchemaFile string, schemaFile string, params *IngestParams, ...) error
- func IngestPredictionDataset(params *PredictParams) error
- func JoinDatamart(joinLeft *JoinSpec, joinRight *JoinSpec, rightOrigin *model.DatasetOrigin) (string, *apiModel.FilteredData, error)
- func JoinDistil(dataStorage apiModel.DataStorage, joinLeft *JoinSpec, joinRight *JoinSpec, ...) (string, *apiModel.FilteredData, error)
- func LoadFittedSolution(fittedSolutionURI string, solutionStorage api.SolutionStorage, ...) (string, error)
- func LoadSolution(solutionURI string) (string, error)
- func Merge(schemaFile string, dataset string, config *IngestTaskConfig) (string, error)
- func NewDefaultClient(config env.Config, userAgent string, discoveryLogger middleware.MethodLogger) (*compute.Client, error)
- func Predict(params *PredictParams) (string, error)
- func PrepExistingPredictionDataset(params *PredictParams) (string, string, error)
- func Query(params QueryParams) (map[string]interface{}, error)
- func Rank(schemaPath string, dataset string, config *IngestTaskConfig) (string, error)
- func Sample(originalSchemaFile string, schemaFile string, dataset string, ...) (string, bool, int, error)
- func SaveFittedSolution(fittedSolutionID string, modelName string, modelDescription string, ...) (*api.ExportedModel, error)
- func SaveSolution(solutionID string) (string, error)
- func SetClient(computeClient *compute.Client)
- func SetGroups(datasetID string, rawGroupings []map[string]interface{}, data api.DataStorage, ...) error
- func Summarize(schemaPath string, dataset string, config *IngestTaskConfig) (string, error)
- func TargetRank(dataset *api.Dataset, target string, features []*model.Variable, ...) (map[string]float64, error)
- func UpdateExtremas(dataset string, metaStorage api.MetadataStorage, dataStorage api.DataStorage) error
- func VerifySuggestedTypes(dataset string, dataStorage api.DataStorage, metaStorage api.MetadataStorage) error
- func VerticalConcat(dataStorage apiModel.DataStorage, joinLeft *JoinSpec, joinRight *JoinSpec) (string, *apiModel.FilteredData, error)
- type ClusterPoint
- type DatasetConstructor
- type FeatureRequest
- type GeocodedPoint
- type ImportanceResult
- type IngestParams
- type IngestResult
- type IngestSteps
- type IngestTaskConfig
- type JoinPair
- type JoinSpec
- type OutlierPoint
- type PredictParams
- type PredictionTimeseriesDataset
- type QueryParams
- type SummaryResult
Constants ¶
const ( // OutlierAnomaly is the category name used for anomalies discovered by outlier detection OutlierAnomaly = "anomaly" // OutlierRegular is the category name used for regular values as discovered by outlier detection OutlierRegular = "regular" )
const (
// DefaultSeparator is the default separator to use when dealing with groupings.
DefaultSeparator = "_"
)
Variables ¶
This section is empty.
Functions ¶
func Classify ¶
func Classify(schemaPath string, dataset string, config *IngestTaskConfig) (string, error)
Classify will classify the dataset using a primitive.
func Clean ¶
func Clean(schemaFile string, dataset string, params *IngestParams, config *IngestTaskConfig) (string, error)
Clean will clean bad data for further processing.
func CloneDataset ¶
func CloneDataset(sourceDatasetID string, cloneDatasetID string, cloneFolder string, metaStorage api.MetadataStorage, dataStorage api.DataStorage, filterParams *api.FilterParams) error
CloneDataset clones a dataset in metadata storage, data storage and on disk.
func CopyDiskDataset ¶
func CopyDiskDataset(existingURI string, newURI string, newDatasetID string, newStorageName string) (*api.DiskDataset, error)
CopyDiskDataset copies an existing dataset on disk to a new location, updating the ID and the storage name.
func CreateComposedVariable ¶
func CreateComposedVariable(metaStorage api.MetadataStorage, dataStorage api.DataStorage, dataset string, storageName string, composedVarName string, composedVarDisplayName string, sourceVarNames []string) error
CreateComposedVariable creates a new variable to use as group id.
func CreateDataset ¶
func CreateDataset(dataset string, datasetCtor DatasetConstructor, outputPath string, config *env.Config) (string, string, error)
CreateDataset structures a raw csv file into a valid D3M dataset.
func CreateDatasetFromResult ¶
func CreateDatasetFromResult(newDatasetName string, predictionDataset string, sourceDataset string, features []string, targetName string, resultURI string, datasetDescription string, metaStorage api.MetadataStorage, dataStorage api.DataStorage, config env.Config) (string, error)
CreateDatasetFromResult creates a new dataset based on a result set & the input to the model
func CreateFeaturizedDatasetID ¶
CreateFeaturizedDatasetID creates a dataset id for a learning dataset.
func DeleteDataset ¶
func DeleteDataset(ds *api.Dataset, metaStorage api.MetadataStorage, dataStorage api.DataStorage, softDelete bool) error
DeleteDataset deletes a dataset from metadata and, if not a soft delete, from the database.
func DeleteQueryCache ¶
func DeleteQueryCache(datasetID string)
DeleteQueryCache deletes the query cache folder if it exists.
func ExportDataset ¶
func ExportDataset(dataset string, metaStorage api.MetadataStorage, dataStorage api.DataStorage, filterParams *api.FilterParams) (string, string, error)
ExportDataset extracts a dataset from the database and metadata storage, writing it to disk in D3M dataset format.
func Featurize ¶
func Featurize(originalSchemaFile string, schemaFile string, data api.DataStorage, storage api.MetadataStorage, dataset string, config *IngestTaskConfig) error
Featurize provides a separate step for featurzing data so that it can be called independently of the ingest step.
func FeaturizeDataset ¶
func FeaturizeDataset(originalSchemaFile string, schemaFile string, dataset string, metaStorage api.MetadataStorage, config *IngestTaskConfig) (string, string, error)
FeaturizeDataset creates a featurized output of the data that can be used in simplified pipelines.
func Format ¶
func Format(schemaFile string, dataset string, config *IngestTaskConfig) (string, error)
Format will format a dataset to have the required structures for D3M.
func GeocodeForwardDataset ¶
func GeocodeForwardDataset(schemaFile string, dataset string, config *IngestTaskConfig) (string, error)
GeocodeForwardDataset geocodes fields that are types of locations. The results are append to the dataset and the whole is output to disk.
func GetUniqueOutputFolder ¶
GetUniqueOutputFolder produces a unique name for a dataset in a folder.
func ImportPredictionDataset ¶
func ImportPredictionDataset(params *PredictParams) (string, string, error)
ImportPredictionDataset imports a dataset to be used for predictions.
func Ingest ¶
func Ingest(originalSchemaFile string, schemaFile string, data api.DataStorage, storage api.MetadataStorage, params *IngestParams, config *IngestTaskConfig, steps *IngestSteps) (string, error)
Ingest the metadata to ES and the data to Postgres.
func IngestMetadata ¶
func IngestMetadata(originalSchemaFile string, schemaFile string, data api.DataStorage, storage api.MetadataStorage, params *IngestParams, config *IngestTaskConfig, steps *IngestSteps) (string, error)
IngestMetadata ingests the data to ES.
func IngestPostgres ¶
func IngestPostgres(originalSchemaFile string, schemaFile string, params *IngestParams, config *IngestTaskConfig, steps *IngestSteps) error
IngestPostgres ingests a dataset to PG storage.
func IngestPredictionDataset ¶
func IngestPredictionDataset(params *PredictParams) error
IngestPredictionDataset ingests a dataset to be used for predictions.
func JoinDatamart ¶
func JoinDatamart(joinLeft *JoinSpec, joinRight *JoinSpec, rightOrigin *model.DatasetOrigin) (string, *apiModel.FilteredData, error)
JoinDatamart will make all your dreams come true.
func JoinDistil ¶
func JoinDistil(dataStorage apiModel.DataStorage, joinLeft *JoinSpec, joinRight *JoinSpec, joinPairs []*JoinPair, joinType string, returnRaw bool) (string, *apiModel.FilteredData, error)
JoinDistil will bring misery.
func LoadFittedSolution ¶
func LoadFittedSolution(fittedSolutionURI string, solutionStorage api.SolutionStorage, metadataStorage api.MetadataStorage) (string, error)
LoadFittedSolution loads a fitted solution via TA2TA3 API.
func LoadSolution ¶
LoadSolution loads an unfitted solution via TA2TA3 API.
func Merge ¶
func Merge(schemaFile string, dataset string, config *IngestTaskConfig) (string, error)
Merge will merge data resources into a single data resource.
func NewDefaultClient ¶
func NewDefaultClient(config env.Config, userAgent string, discoveryLogger middleware.MethodLogger) (*compute.Client, error)
NewDefaultClient creates a new client to use when submitting pipelines.
func Predict ¶
func Predict(params *PredictParams) (string, error)
Predict processes input data to generate predictions.
func PrepExistingPredictionDataset ¶
func PrepExistingPredictionDataset(params *PredictParams) (string, string, error)
PrepExistingPredictionDataset sets up an existing dataset to be usable for predictions.
func Query ¶
func Query(params QueryParams) (map[string]interface{}, error)
Query uses a query pipeline to rank data by nearness to a target.
func Rank ¶
func Rank(schemaPath string, dataset string, config *IngestTaskConfig) (string, error)
Rank will rank the dataset using a primitive.
func Sample ¶
func Sample(originalSchemaFile string, schemaFile string, dataset string, config *IngestTaskConfig) (string, bool, int, error)
Sample takes a sample of the dataset since larger datasets can lead to broken user experience through long lasting TA2 processing.
func SaveFittedSolution ¶
func SaveFittedSolution(fittedSolutionID string, modelName string, modelDescription string, solutionStorage api.SolutionStorage, metadataStorage api.MetadataStorage) (*api.ExportedModel, error)
SaveFittedSolution saves a fitted solution to disk via TA2TA3 API.
func SaveSolution ¶
SaveSolution saves a solution to disk via TA2TA3 API.
func SetGroups ¶
func SetGroups(datasetID string, rawGroupings []map[string]interface{}, data api.DataStorage, meta api.MetadataStorage, config *IngestTaskConfig) error
SetGroups updates the dataset metadata (as stored) to capture group information.
func Summarize ¶
func Summarize(schemaPath string, dataset string, config *IngestTaskConfig) (string, error)
Summarize will summarize the dataset using a primitive.
func TargetRank ¶
func TargetRank(dataset *api.Dataset, target string, features []*model.Variable, source metadata.DatasetSource) (map[string]float64, error)
TargetRank will rank the dataset relative to a target variable using a primitive.
func UpdateExtremas ¶
func UpdateExtremas(dataset string, metaStorage api.MetadataStorage, dataStorage api.DataStorage) error
UpdateExtremas will update every field's extremas in the specified dataset.
func VerifySuggestedTypes ¶
func VerifySuggestedTypes(dataset string, dataStorage api.DataStorage, metaStorage api.MetadataStorage) error
VerifySuggestedTypes checks expands the suggested types to include all valid types the database storage can support.
func VerticalConcat ¶
func VerticalConcat(dataStorage apiModel.DataStorage, joinLeft *JoinSpec, joinRight *JoinSpec) (string, *apiModel.FilteredData, error)
VerticalConcat will bring mastery.
Types ¶
type ClusterPoint ¶
ClusterPoint contains data that has been clustered.
func Cluster ¶
func Cluster(dataset *api.Dataset, variable string, useKMeans bool, clusterCount int) (bool, []*ClusterPoint, error)
Cluster will cluster the dataset fields using a primitive.
func ClusterExplainOutput ¶
func ClusterExplainOutput(variable string, resultURI string, explainURI string, config *env.Config) (bool, []*ClusterPoint, error)
ClusterExplainOutput clusters the explained output from a model.
type DatasetConstructor ¶
type DatasetConstructor interface { CreateDataset(rootDataPath string, datasetName string, config *env.Config) (*serialization.RawDataset, error) GetDefinitiveTypes() []*model.Variable CleanupTempFiles() }
DatasetConstructor is used to build a dataset.
type FeatureRequest ¶
type FeatureRequest struct { SourceVariableName string FeatureVariableName string OutputVariableName string Variable *model.Variable Step *description.FullySpecifiedPipeline Clustering bool }
FeatureRequest captures the properties of a request to a primitive.
type GeocodedPoint ¶
GeocodedPoint contains data that has been geocoded.
func GeocodeForward ¶
func GeocodeForward(datasetInputDir string, dataset string, variable *model.Variable) ([]*GeocodedPoint, error)
GeocodeForward will geocode a column into lat & lon values.
type ImportanceResult ¶
ImportanceResult is the result from a ranking operation.
type IngestParams ¶
type IngestParams struct { Source metadata.DatasetSource DataCtor api.DataStorageCtor MetaCtor api.MetadataStorageCtor ID string Origins []*model.DatasetOrigin Type api.DatasetType Path string RawGroupings []map[string]interface{} IndexFields []string DefinitiveTypes map[string]*model.Variable }
IngestParams contains the parameters needed to ingest a dataset
func (*IngestParams) GetSchemaDocPath ¶
func (i *IngestParams) GetSchemaDocPath() string
GetSchemaDocPath returns the schema path to use when ingesting.
type IngestResult ¶
IngestResult captures the result of a dataset ingest process.
func IngestDataset ¶
func IngestDataset(params *IngestParams, config *IngestTaskConfig, steps *IngestSteps) (*IngestResult, error)
IngestDataset executes the complete ingest process for the specified dataset.
type IngestSteps ¶
type IngestSteps struct { ClassificationOverwrite bool VerifyMetadata bool FallbackMerged bool CreateMetadataTables bool CheckMatch bool SkipFeaturization bool }
IngestSteps is a collection of parameters that specify ingest behaviour.
type IngestTaskConfig ¶
type IngestTaskConfig struct { DatasetBatchSize int HasHeader bool FeaturizationEnabled bool GeocodingEnabled bool ClassificationOutputPathRelative string ClassificationEnabled bool RankingOutputPathRelative string DatabasePassword string DatabaseUser string Database string DatabaseHost string DatabasePort int DatabaseBatchSize int DatabaseLogLevel string ImputeEnabled bool SummaryOutputPathRelative string SummaryMachineOutputPathRelative string SummaryEnabled bool ESEndpoint string HardFail bool IngestOverwrite bool SampleRowLimit int }
IngestTaskConfig captures the necessary configuration for an data ingest.
func NewConfig ¶
func NewConfig(config env.Config) *IngestTaskConfig
NewConfig creates an ingest config based on a distil config.
type JoinSpec ¶
type JoinSpec struct { DatasetID string DatasetPath string DatasetSource ingestMetadata.DatasetSource ExistingMetadata *model.Metadata UpdatedVariables []*model.Variable }
JoinSpec stores information for one side of a join operation.
type OutlierPoint ¶
OutlierPoint contains whether or not a datapoint is an outlier or not
func OutlierDetection ¶
func OutlierDetection(dataset *api.Dataset, variable string) ([]*OutlierPoint, error)
OutlierDetection finds outliers in either tabular or remote sensing data
type PredictParams ¶
type PredictParams struct { Meta *model.Metadata LearningDataMeta *model.Metadata SourceDataset *api.Dataset Dataset string SchemaPath string SourceDatasetID string SolutionID string FittedSolutionID string DatasetConstructor DatasetConstructor OutputPath string IndexFields []string Target *model.Variable MetaStorage api.MetadataStorage DataStorage api.DataStorage SolutionStorage api.SolutionStorage ModelStorage api.ExportedModelStorage IngestConfig *IngestTaskConfig Config *env.Config }
PredictParams contains all parameters passed to the predict function.
type PredictionTimeseriesDataset ¶
type PredictionTimeseriesDataset struct {
// contains filtered or unexported fields
}
PredictionTimeseriesDataset has the paramaters necessary to create a timeseries dataset from minimal information.
func NewPredictionTimeseriesDataset ¶
func NewPredictionTimeseriesDataset(params *PredictParams, interval float64, count int) (*PredictionTimeseriesDataset, error)
NewPredictionTimeseriesDataset creates prediction timeseries dataset.
func (*PredictionTimeseriesDataset) CleanupTempFiles ¶
func (p *PredictionTimeseriesDataset) CleanupTempFiles()
CleanupTempFiles does nothing.
func (*PredictionTimeseriesDataset) CreateDataset ¶
func (p *PredictionTimeseriesDataset) CreateDataset(rootDataPath string, datasetName string, config *env.Config) (*serialization.RawDataset, error)
CreateDataset creates a raw dataset based on minimum timeseries parameters.
func (*PredictionTimeseriesDataset) GetDefinitiveTypes ¶
func (p *PredictionTimeseriesDataset) GetDefinitiveTypes() []*model.Variable
GetDefinitiveTypes returns an empty list as definitive types.
type QueryParams ¶
type QueryParams struct { Dataset string TargetName string DataStorage api.DataStorage MetaStorage api.MetadataStorage Filters *api.FilterParams }
QueryParams helper struct to simplify query task calling.
type SummaryResult ¶
type SummaryResult struct {
Summary string `json:"summary"`
}
SummaryResult represents a summary result.