model

package
v0.0.0-...-6b52c20 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 18, 2017 License: Apache-2.0 Imports: 10 Imported by: 9

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CreateTask

func CreateTask(task *Task) error

func DeleteTask

func DeleteTask(id string) error

func IncrementDomainLinkCount

func IncrementDomainLinkCount(host string) error

func UpdateTask

func UpdateTask(task *Task)

Types

type Domain

type Domain struct {
	Host       string         `storm:"id,unique" json:"host,omitempty" gorm:"not null;unique;primary_key"`
	LinksCount int64          `json:"links_count,omitempty"`
	Favicon    string         `json:"favicon,omitempty"`
	Settings   *DomainSetting `storm:"inline" json:"settings,omitempty"`
	CreateTime *time.Time     `storm:"index" json:"created,omitempty"`
	UpdateTime *time.Time     `storm:"index" json:"updated,omitempty"`
}

func CreateDomain

func CreateDomain(host string) Domain

func GetDomain

func GetDomain(domain string) (Domain, error)

func GetDomainList

func GetDomainList(from, size int, domain string) (int, []Domain, error)

type DomainSetting

type DomainSetting struct {
}

type IndexDocument

type IndexDocument struct {
	Index  string                 `json:"_index,omitempty"`
	Type   string                 `json:"_type,omitempty"`
	Id     string                 `json:"_id,omitempty"`
	Source map[string]interface{} `json:"_source,omitempty"`
}

type KV

type KV struct {
	Key   string   `json:"key,omitempty"`
	Value []string `storm:"inline" json:"value,omitempty"`
}

type LinkGroup

type LinkGroup struct {
	Internal []PageLink `json:"internal,omitempty"`
	External []PageLink `json:"external,omitempty"`
}
type PageLink struct {
	Url   string `json:"url"`
	Label string `json:"label"`
}

type Seed

type Seed struct {
	Url       string `storm:"index" json:"url,omitempty" gorm:"type:not null;varchar(500)"` // the seed url may not cleaned, may miss the domain part, need reference to provide the complete url information
	Reference string `json:"reference_url,omitempty"`
	Depth     int    `storm:"index" json:"depth,omitempty"`
	Breadth   int    `storm:"index" json:"breadth,omitempty"`
}

func NewTaskSeed

func NewTaskSeed(url, ref string, depth int, breadth int) Seed

func TaskSeedFromBytes

func TaskSeedFromBytes(b []byte) Seed

func (Seed) Get

func (this Seed) Get(url string) Seed

func (Seed) GetBytes

func (this Seed) GetBytes() ([]byte, error)

func (Seed) MustGetBytes

func (this Seed) MustGetBytes() []byte

type Snapshot

type Snapshot struct {
	ID      string `json:"id,omitempty" gorm:"not null;unique;primary_key"`
	Version int    `json:"version,omitempty"`
	Path    string `json:"path,omitempty"` //path of this file
	File    string `json:"file,omitempty"` //filename of this page

	StatusCode int    `json:"-"`
	Payload    []byte `json:"-"`
	Size       uint64 `json:"size,omitempty"`

	Headers    map[string][]string     `json:"-"`
	Metadata   *map[string]interface{} `json:"-"`
	Parameters []KV                    `json:"-"`

	Language string `json:"lang,omitempty"`

	Title       string `json:"title,omitempty"`
	Summary     string `json:"summary,omitempty"`
	Text        string `json:"text,omitempty"`
	ContentType string `json:"content_type,omitempty"`

	Tags []string `json:"tags,omitempty"`

	Links LinkGroup `json:"links,omitempty"`

	Images struct {
		Internal []PageLink `json:"internal,omitempty"`
		External []PageLink `json:"external,omitempty"`
	} `json:"images,omitempty"`

	H1     []string `json:"h1,omitempty"`
	H2     []string `json:"h2,omitempty"`
	H3     []string `json:"h3,omitempty"`
	H4     []string `json:"h4,omitempty"`
	H5     []string `json:"h5,omitempty"`
	Bold   []string `json:"bold,omitempty"`
	Italic []string `json:"italic,omitempty"`

	Classifications  []string                `json:"classifications,omitempty"`
	EnrichedFeatures *map[string]interface{} `json:"enriched_features,omitempty"`

	Hash    string `json:"hash,omitempty"`
	SimHash string `json:"sim_hash,omitempty"`

	CreateTime *time.Time `json:"created,omitempty"`
}

type Task

type Task struct {
	Seed
	ID            string          `gorm:"not null;unique;primary_key" json:"id"`
	Host          string          `gorm:"index" json:"-"`
	Schema        string          `json:"schema,omitempty"`
	OriginalUrl   string          `json:"original_url,omitempty"`
	Phrase        pipeline.Phrase `gorm:"index" json:"phrase"`
	Status        TaskStatus      `gorm:"index" json:"status"`
	Message       string          `json:"-"`
	CreateTime    *time.Time      `gorm:"index" json:"created,omitempty"`
	UpdateTime    *time.Time      `gorm:"index" json:"updated,omitempty"`
	LastFetchTime *time.Time      `gorm:"index" json:"-"`
	LastCheckTime *time.Time      `gorm:"index" json:"-"`
	NextCheckTime *time.Time      `gorm:"index" json:"-"`

	SnapshotVersion int    `json:"-"`
	SnapshotID      string `json:"-"` //Last Snapshot's ID
	SnapshotHash    string `json:"-"` //Last Snapshot's Hash
	SnapshotSimHash string `json:"-"` //Last Snapshot's Simhash
}

func GetPendingFetchTasks

func GetPendingFetchTasks() (int, []Task, error)

func GetTask

func GetTask(id string) (Task, error)

func GetTaskByField

func GetTaskByField(k, v string) (Task, error)

func GetTaskList

func GetTaskList(from, size int, domain string) (int, []Task, error)

type TaskStatus

type TaskStatus int
const TaskCreated TaskStatus = 0
const TaskFetchFailed TaskStatus = 2
const TaskFetchSuccess TaskStatus = 3

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL