runner

package
v0.0.0-...-8739376 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 28, 2020 License: MIT Imports: 24 Imported by: 0

Documentation

Index

Constants

View Source
const ARCHIVE_EXT = "zip"
View Source
const DEFAULT_IMAGE = "registry.cn-hangzhou.aliyuncs.com/tradingai/tbase:latest"
View Source
const UPLOAD_CONTENT_TYPE = "application/zip"

Variables

This section is empty.

Functions

func Archive

func Archive(srcDir string) (destPath string, err error)

NOTE(wen): 打包使用zip格式压缩

func CreateTestEvalJob

func CreateTestEvalJob() (job *pb.Job)

func CreateTestInferJob

func CreateTestInferJob() (job *pb.Job)

func CreateTestTrainJob

func CreateTestTrainJob() (job *pb.Job)

func DeleteDirectory

func DeleteDirectory(dir string) (err error)

func DeleteFile

func DeleteFile(filePath string) (err error)

func GetMemory

func GetMemory() (totalMemory, availableMemeory int64, err error)

获取总内存,可用内存信息

func GetPhysicalCPUNum

func GetPhysicalCPUNum() (n int32, err error)

获取机器的cpu数量

func Unarchive

func Unarchive(srcPath, destDir string) (err error)

NOTE(wen): 解压时按照文件名的扩展名的格式来判断文件格式

Types

type Conf

type Conf struct {
	StorageDir       string
	Minio            minio.MinioConf
	HeartbeatSeconds int
	TushareToken     string
	DataRootDir      string
	JobLogDir        string
	JobShellDir      string
	ModelDir         string
	ProgressBarDir   string
	TensorboardDir   string
	InferDir         string
	EvalDir          string
	SchedulerHost    string
	SchedulerPort    string
	Token            string
}

func LoadConf

func LoadConf() (conf *Conf, err error)

LoadConf load config from env

func (*Conf) Validate

func (c *Conf) Validate() (err error)

type Container

type Container struct {
	Name    string
	ID      string
	ShortID string
	Job     *pb.Job
	Plugin  plugins.Plugin
}

type Machine

type Machine struct {
	GPUNum             int32
	GPUsIndex          []int32
	GPUMemory          int64
	GPUUtilization     float64
	AvailableGPUMemory int64
	CPUNum             int32
	CPUUtilization     float64
	Memory             int64
	AvailableMemory    int64
	// contains filtered or unexported fields
}

func NewMachine

func NewMachine() (m *Machine, err error)

func (*Machine) Update

func (m *Machine) Update() (err error)

func (*Machine) UpdateCPUUtilization

func (m *Machine) UpdateCPUUtilization() (err error)

func (*Machine) UpdateMemory

func (m *Machine) UpdateMemory() (err error)

type Runner

type Runner struct {
	Conf       *Conf
	Minio      *minio.Client
	ID         string
	Containers map[uint64]Container // key: jobID, value: Container
	Machine    *Machine

	Status pb.RunnerStatus
	// contains filtered or unexported fields
}

func New

func New(conf *Conf) (r *Runner, err error)

func (*Runner) Clean

func (r *Runner) Clean(id string)

func (*Runner) CreateJob

func (r *Runner) CreateJob(job *pb.Job) (err error)

func (*Runner) DeleteEval

func (r *Runner) DeleteEval(id string) (err error)

func (*Runner) DeleteInfer

func (r *Runner) DeleteInfer(id string) (err error)

func (*Runner) DeleteLog

func (r *Runner) DeleteLog(id string) (err error)

func (*Runner) DeleteModel

func (r *Runner) DeleteModel(id string) (err error)

func (*Runner) DeleteProgressBar

func (r *Runner) DeleteProgressBar(id string) (err error)

func (*Runner) DeleteShell

func (r *Runner) DeleteShell(id string) (err error)

func (*Runner) DeleteTensorboard

func (r *Runner) DeleteTensorboard(id string) (err error)

func (*Runner) Destory

func (r *Runner) Destory() (err error)

func (*Runner) FinishedJob

func (r *Runner) FinishedJob(job *pb.Job) (err error)

upload files: model, tensorboard Update job.output clean dirs and files: model dir, job log, tensorboard dir, evals, infers, progress_bars

func (*Runner) Free

func (r *Runner) Free()

func (*Runner) Heartbeat

func (r *Runner) Heartbeat() (err error)

func (*Runner) RemoveContainer

func (r *Runner) RemoveContainer(job *pb.Job) (err error)

func (*Runner) RunJob

func (r *Runner) RunJob(job *pb.Job) (err error)

func (*Runner) RunJobs

func (r *Runner) RunJobs(jobs []*pb.Job)

NOTE(wen): 一个job出现错误,不影响runner继续执行, 所以没有返回error

func (*Runner) StartOrDie

func (r *Runner) StartOrDie() (err error)

func (*Runner) StopJob

func (r *Runner) StopJob(job *pb.Job) (err error)

func (*Runner) UpdateBar

func (r *Runner) UpdateBar(job *pb.Job, p plugins.Plugin) (err error)

func (*Runner) UpdateEvalOutput

func (r *Runner) UpdateEvalOutput(job *pb.Job, p plugins.Plugin) (err error)

func (*Runner) UpdateInferOutput

func (r *Runner) UpdateInferOutput(job *pb.Job, p plugins.Plugin) (err error)

func (*Runner) UpdateOutput

func (r *Runner) UpdateOutput(job *pb.Job) (err error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL