Documentation
¶
Index ¶
Constants ¶
View Source
const ( MaxCrawlDepth = 5 MaxPagesToCrawl = 100 )
View Source
const ( // queue group QUEUE_GROUP = "scraper-service" // consumer config CONSUMER_NAME = "scraper-consumer" ACK_WAIT = 30 * time.Second MAX_DELIVERY_ATTEMPTS = 5 MAX_ACK_PENDING = 100 FETCH_BATCH_SIZE = 50 MAX_FETCH_WAIT = 500 * time.Millisecond ERR_BACKOFF = 100 * time.Millisecond )
Variables ¶
View Source
var ( ErrPaymentRequired = errors.New("jina balance requires topup") ErrUnprocessable = errors.New("jina cannot process webpage") ErrUrlNotReachable = errors.New("url is not reachable") )
View Source
var SUBSCRIBED_SUBJECT = enums.EventWebtrackerCreated.String()
Functions ¶
This section is empty.
Types ¶
type DocumentSection ¶
type ScraperService ¶ added in v0.0.42
type ScraperService interface {
interfaces.NatsService
Crawl(ctx context.Context, domain string) error
}
func NewScraperService ¶
func NewScraperService( config *config.JinaConfig, natsConn *nats_internal.NATSConnections, leadsDB *database.DbConnections, repositories *repository.Repositories, ) ScraperService
Click to show internal directories.
Click to hide internal directories.