Documentation
¶
Index ¶
- Constants
- type Action
- type Client
- func (c *Client) CancelTask(crawlerID, taskID string) error
- func (c *Client) CrawlURLs(crawlerID string, URLs []string, save, saveSpecified bool) (string, error)
- func (c *Client) Create(name string, config Config) (string, error)
- func (c *Client) Get(crawlerID string, withConfig bool) (*Crawler, error)
- func (c *Client) List(itemsPerPage, page int, name, appID string) (*CrawlersResponse, error)
- func (c *Client) ListAll(name, appID string) ([]*CrawlerListItem, error)
- func (c *Client) Pause(crawlerID string) (string, error)
- func (c *Client) Reindex(crawlerID string) (string, error)
- func (c *Client) Run(crawlerID string) (string, error)
- func (c *Client) Stats(crawlerID string) (*StatsResponse, error)
- func (c *Client) Test(crawlerID, URL string, config *Config) (*TestResponse, error)
- type Config
- type Crawler
- type CrawlerListItem
- type CrawlersResponse
- type Err
- type ErrResponse
- type LabeledError
- type RecordExtractor
- type StatsResponse
- type TaskIDResponse
- type TestResponse
Constants ¶
const (
// DefaultBaseURL is the default base URL for the Algolia Crawler API.
DefaultBaseURL = "https://crawler.algolia.com/api/1/"
)
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Action ¶
type Action struct {
IndexName string `json:"indexName"`
PathsToMatch []string `json:"pathsToMatch"`
SelectorsToMatch []string `json:"selectorsToMatch,omitempty"`
FileTypesToMatch []string `json:"fileTypesToMatch,omitempty"`
RecordExtractor RecordExtractor `json:"recordExtractor"`
}
Action is a Crawler configuration action.
type Client ¶
Client provides methods to interact with the Algolia Crawler API.
func NewClientWithHTTPClient ¶
NewClientWithHTTPClient returns a new Crawler API client with a custom HTTP client.
func (*Client) CancelTask ¶
CancelTask cancels a blocking task.
func (*Client) CrawlURLs ¶
func (c *Client) CrawlURLs(crawlerID string, URLs []string, save, saveSpecified bool) (string, error)
CrawlURLs crawls the specified URLs on the specified Crawler. It returns the Task ID if successful.
func (*Client) List ¶
func (c *Client) List(itemsPerPage, page int, name, appID string) (*CrawlersResponse, error)
List lists Crawlers.
func (*Client) ListAll ¶
func (c *Client) ListAll(name, appID string) ([]*CrawlerListItem, error)
ListAll lists all Crawlers
type Config ¶
type Config struct {
AppID string `json:"appId,omitempty"`
APIKey string `json:"apiKey,omitempty"`
IndexPrefix string `json:"indexPrefix,omitempty"`
Schedule string `json:"schedule,omitempty"`
StartUrls []string `json:"startUrls,omitempty"`
Sitemaps []string `json:"sitemaps,omitempty"`
ExclusionPatterns []string `json:"exclusionPatterns,omitempty"`
IgnoreQueryParams []string `json:"ignoreQueryParams,omitempty"`
RenderJavaScript bool `json:"renderJavaScript,omitempty"`
RateLimit int `json:"rateLimit,omitempty"`
ExtraUrls []string `json:"extraUrls,omitempty"`
MaxDepth int `json:"maxDepth,omitempty"`
MaxURLs int `json:"maxUrls,omitempty"`
IgnoreRobotsTxtRules bool `json:"ignoreRobotsTxtRules,omitempty"`
IgnoreNoIndex bool `json:"ignoreNoIndex,omitempty"`
IgnoreNoFollowTo bool `json:"ignoreNoFollowTo,omitempty"`
IgnoreCanonicalTo bool `json:"ignoreCanonicalTo,omitempty"`
SaveBackup bool `json:"saveBackup,omitempty"`
InitialIndexSettings map[string]*search.Settings `json:"initialIndexSettings,omitempty"`
Actions []*Action `json:"actions,omitempty"`
}
Config is a Crawler configuration.
type Crawler ¶
type Crawler struct {
ID string `json:"id,omitempty"`
Name string `json:"name"`
Running bool `json:"running,omitempty"`
Reindexing bool `json:"reindexing,omitempty"`
Blocked bool `json:"blocked,omitempty"`
BlockingTaskID string `json:"blockingTaskId,omitempty"`
BlockingError string `json:"blockingError,omitempty"`
CreatedAt time.Time `json:"createdAt,omitempty"`
UpdatedAt time.Time `json:"updatedAt,omitempty"`
LastReindexStartedAt time.Time `json:"lastReindexStartedAt,omitempty"`
LastReindexEndedAt time.Time `json:"lastReindexEndedAt,omitempty"`
Config *Config `json:"config,omitempty"`
}
Crawler is a Crawler.
type CrawlerListItem ¶
CrawlerListItem is a crawler list item.
type CrawlersResponse ¶
type CrawlersResponse struct {
Items []*CrawlerListItem `json:"items"`
// Pagination
Page int `json:"page"`
ItemsPerPage int `json:"itemsPerPage"`
Total int `json:"total"`
}
CrawlersResponse is the response from the crawler crawlers endpoint.
type Err ¶
type Err struct {
Message string `json:"message"`
Code string `json:"code"`
Errors []LabeledError `json:"errors,omitempty"`
}
Err is a Crawler API error.
type ErrResponse ¶
type ErrResponse struct {
Err Err `json:"error"`
}
ErrResponse is a Crawler API error response.
type LabeledError ¶
type LabeledError struct {
Type string `json:"type"`
Message string `json:"message"`
Label string `json:"label"`
}
LabeledError is a Crawler API labeled error.
type RecordExtractor ¶
RecordExtractor is a Crawler configuration record extractor.
type StatsResponse ¶
type StatsResponse struct {
Count int `json:"count"`
Data []struct {
Reason string `json:"reason"`
Status string `json:"status"`
Category string `json:"category"`
Readable string `json:"readable"`
Count int `json:"count"`
} `json:"data"`
}
StatsResponse is the response from the crawler crawlers/{id}/stats/urls endpoint.
type TaskIDResponse ¶
type TaskIDResponse struct {
TaskID string `json:"taskId"`
}
TaskIDResponse is the response when a task is created.
type TestResponse ¶
type TestResponse struct {
StartDate time.Time `json:"startDate"`
EndDate time.Time `json:"endDate"`
Logs interface{} `json:"logs,omitempty"`
Records interface{} `json:"records,omitempty"`
Links []string `json:"links,omitempty"`
ExternalData interface{} `json:"externalData,omitempty"`
Error *Err `json:"error,omitempty"`
}
TestResponse is the response from the crawler crawlers/{id}/test endpoint.