crawly

package module
v0.0.0-...-cf7a88a Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 19, 2023 License: MIT Imports: 6 Imported by: 6

README

🔦 crawly

An extensible library for developing asynchronous crawlers (or cron-like background services). Written in Go.

ℹ️ Note: at this stage, the ABI/API of this package is subject to change. Feel free to contribute.

Requires Go 1.21 (or newer).

Examples

TODO

  • documentation (ugh...)

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	InvalidTrackingCommand = errors.New("invalid tracking command")

	ExceededTrackingOrderTimeout = errors.New("exceeded tracking order timeout")
	ExceededTrackingTimeout      = errors.New("exceeded tracking timeout")
)
View Source
var DefaultCrawlerSettings = CrawlerSettings{
	TrackingOrderTimeout:         45 * time.Second,
	MinimumTrackingOrderDelay:    10 * time.Second,
	MaximumTrackingOrderAttempts: 3,

	TrackingTimeout:         45 * time.Second,
	MinimumTrackingDelay:    10 * time.Second,
	MaximumTrackingAttempts: 10,
}
View Source
var (
	InvalidHandle = errors.New("invalid handle")
)
View Source
var NilHandler = errors.New("handler is nil")

Functions

func SetCrawlerHandlers

func SetCrawlerHandlers(cr *Crawler, handlers CrawlerHandlers)

func SetCrawlerSettings

func SetCrawlerSettings(cr *Crawler, settings CrawlerSettings)

Types

type AnyCrawler

type AnyCrawler interface {
	Logger() *slog.Logger
	SetLogger(logger *slog.Logger)
	Log(ctx context.Context, params clog.Params)

	Tracked() (handles []Handle)
	IsTracked(handle Handle) bool
	Track(ctx context.Context, handle Handle) (tracked bool, err error)
	Untrack(ctx context.Context, handle Handle) (tracked bool, err error)
	UntrackAll(ctx context.Context) (untracked int, err error)

	Paused() bool
	Pause(ctx context.Context)
	Resume(ctx context.Context)
	Immediate(ctx context.Context, in time.Duration) (ok bool, err error)

	Active() bool
	Start(ctx context.Context, sessionSettings SessionSettings) error
	Stop(ctx context.Context) (ok bool, err error)
	Listen() csync.Listener[*Result]
}

type Crawler

type Crawler struct {
	// contains filtered or unexported fields
}

func (*Crawler) Active

func (cr *Crawler) Active() bool

func (*Crawler) Immediate

func (cr *Crawler) Immediate(ctx context.Context, in time.Duration) (ok bool, err error)

func (*Crawler) IsTracked

func (cr *Crawler) IsTracked(handle Handle) bool

func (*Crawler) Listen

func (cr *Crawler) Listen() csync.Listener[*Result]

func (*Crawler) Log

func (cr *Crawler) Log(ctx context.Context, params clog.Params)

func (*Crawler) Logger

func (cr *Crawler) Logger() *slog.Logger

func (*Crawler) Pause

func (cr *Crawler) Pause(ctx context.Context)

func (*Crawler) Paused

func (cr *Crawler) Paused() bool

func (*Crawler) Resume

func (cr *Crawler) Resume(ctx context.Context)

func (*Crawler) SetLogger

func (cr *Crawler) SetLogger(logger *slog.Logger)

func (*Crawler) Start

func (cr *Crawler) Start(ctx context.Context, sessionSettings SessionSettings) error

func (*Crawler) Stop

func (cr *Crawler) Stop(ctx context.Context) (ok bool, err error)

func (*Crawler) Track

func (cr *Crawler) Track(ctx context.Context, handle Handle) (tracked bool, err error)

func (*Crawler) Tracked

func (cr *Crawler) Tracked() (handles []Handle)

func (*Crawler) Untrack

func (cr *Crawler) Untrack(ctx context.Context, handle Handle) (tracked bool, err error)

func (*Crawler) UntrackAll

func (cr *Crawler) UntrackAll(ctx context.Context) (untracked int, err error)

type CrawlerHandlers

type CrawlerHandlers struct {
	Order  OrderHandler
	Entity EntityHandler
}

func LoadCrawlerHandlers

func LoadCrawlerHandlers(cr *Crawler) (handlers CrawlerHandlers)

type CrawlerSettings

type CrawlerSettings struct {
	TrackingOrderTimeout         time.Duration `json:"tracking_order_timeout"`
	MinimumTrackingOrderDelay    time.Duration `json:"minimum_tracking_order_delay"`
	MaximumTrackingOrderAttempts int           `json:"maximum_tracking_order_attempts"`

	TrackingTimeout         time.Duration `json:"tracking_timeout"`
	MinimumTrackingDelay    time.Duration `json:"minimum_tracking_delay"`
	MaximumTrackingAttempts int           `json:"maximum_tracking_attempts"`
}

func LoadCrawlerSettings

func LoadCrawlerSettings(cr *Crawler) (settings CrawlerSettings)

type Entity

type Entity struct {
	Attempt        int       `json:"attempt"`
	LastProcessing time.Time `json:"last_processing"`

	Handle Handle `json:"handle"`
	Data   any    `json:"data"`
}

type EntityHandler

type EntityHandler func(ctx context.Context, entity *Entity, result *TrackingResult) error

type Handle

type Handle interface {
	Equal(handle Handle) bool
	Valid() bool

	String() string
}

type Order

type Order struct {
	Command        TrackingCommand `json:"command"`
	Attempt        int             `json:"attempt"`
	LastProcessing time.Time       `json:"last_processing"`

	Handle Handle `json:"handle"`
	Data   any    `json:"data"`
}

type OrderHandler

type OrderHandler func(ctx context.Context, order *Order, result *TrackingResult) error

type Result

type Result struct {
	Err   error `json:"err"`
	Valid bool  `json:"valid"`
	Idle  bool  `json:"idle"`

	SessionID string    `json:"session_id"`
	Pass      uint64    `json:"pass"`
	Timestamp time.Time `json:"timestamp"`

	Orders   map[Handle]TrackingResult `json:"orders"`
	Entities map[Handle]TrackingResult `json:"entities"`
}

type SessionSettings

type SessionSettings csync.SessionSettings

type TrackingAction

type TrackingAction uint
const (
	TrackingActionNone TrackingAction = iota
	TrackingActionUpdate
	TrackingActionRemove
)

func (TrackingAction) String

func (act TrackingAction) String() string

type TrackingCommand

type TrackingCommand uint
const (
	TrackingCommandNone TrackingCommand = iota
	TrackingCommandStart
	TrackingCommandStop
)

func (TrackingCommand) String

func (cmd TrackingCommand) String() string

type TrackingResult

type TrackingResult struct {
	Order  actionableResult[Order]  `json:"order"`
	Entity actionableResult[Entity] `json:"entity"`
}

Directories

Path Synopsis
cclient module
clog module
csync module

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL