Documentation
¶
Index ¶
- Constants
- Variables
- func HeadUrl(client *http.Client, u string, retry int, delay time.Duration) error
- func MakeMetadataItemFieldMap(md *ItemMetadata) map[string]*[]string
- func NewClient() *http.Client
- type Backoff
- type Cache
- type File
- type ItemMetadata
- type ItemMetadata_Raw
- type ItemTopLevelMetadata
- type RequestStats
- type Role
- type Search
- type SearchItem
- type StringFields
Constants ¶
View Source
const MAX_RESULTS = 5000
Variables ¶
View Source
var DBBucketName = "ia"
View Source
var IASCRAPE_DEBUG = false
View Source
var IASCRAPE_DEGUG_DEPTH = 0
View Source
var IA_ScrapeBaseURL = "https://archive.org/services/search/v1/scrape?"
Reference: https://journal.code4lib.org/articles/18510 Internet Archive Search api (scrape): https://archive.org/help/aboutsearch.htm
View Source
var ItemBaseUrl = "http://archive.org/metadata/"
var ItemBaseUrl = "https://archive.org/metadata/"
Functions ¶
func MakeMetadataItemFieldMap ¶
func MakeMetadataItemFieldMap(md *ItemMetadata) map[string]*[]string
Types ¶
type Backoff ¶
type Backoff func(*RequestStats) time.Duration
type File ¶
type File struct {
Format string `json:"format"`
MD5 string `json:"md5"`
Name string `json:"name"`
Size string `json:"size"`
Title string `json:"title"`
Original []string `json:"-"`
Original_Raw interface{} `json:"original"`
Length string `json:"length"`
TrackOrder int `json:"-"` // This is not part of the JSON
}
type ItemMetadata ¶
type ItemMetadata struct {
ItemMetadata_Raw
AddedDate string `json:"addeddate"`
Collections []string `json:"-"`
Condition string `json:"condition"`
Contributor string `json:"contributor"`
Creators []string `json:"-"`
Dates []string `json:"-"`
Descriptions []string `json:"-"`
Genres []string `json:"-"`
Identifier string `json:"identifier"`
Keywords_CommaSeparated string `json:"keywords"`
Keywords []string `json:"-"`
Languages []string `json:"-"`
MediaType string `json:"media_type"`
Notes []string `json:"-"`
LicenseUrl string `json:"licenseurl"`
PublicDate string `json:"publicdate"`
Publishers []string `json:"-"`
PublisherCatalogNumbers []string `json:"-"`
Scanners []string `json:"-"`
Source []string `json:"-"`
Subjects []string `json:"-"`
Titles []string `json:"-"`
Uploaders []string `json:"-"`
Years []string `json:"-"`
CanonicalYear int
// contains filtered or unexported fields
}
type ItemMetadata_Raw ¶
type ItemMetadata_Raw struct {
CollectionCatalogNumber_Raw interface{} `json:"collection-catalog-number"`
Collection_Raw interface{} `json:"collection"`
Creator_Raw interface{} `json:"creator"`
Date_Raw interface{} `json:"date"`
Description_Raw interface{} `json:"description"`
Genre_Raw interface{} `json:"genre"`
Language_Raw interface{} `json:"language"`
Notes_Raw interface{} `json:"notes"`
PublisherCatalogNumber_Raw interface{} `json:"publisher-catalog-number"`
Publisher_Raw interface{} `json:"publisher"`
Scanner_Raw interface{} `json:"scanner"`
Source_Raw interface{} `json:"source"`
Subject_Raw interface{} `json:"subject"`
Title_Raw interface{} `json:"title"`
Uploader_Raw interface{} `json:"uploader"`
Year_Raw interface{} `json:"year"`
}
type ItemTopLevelMetadata ¶
type ItemTopLevelMetadata struct {
Created int64 `json:"created"`
D1 string `json:"d1"`
Date string `json:"date"`
Dir string `json:"dir"`
Files []File `json:"files"`
Files_Count int32 `json:"files_count"`
ItemLastUpdated int64 `json:"item_last_updated"`
ItemSize int64 `json:"item_size"`
Metadata ItemMetadata `json:"metadata"`
Roles Role `json:"roles"`
Segments []string `json:"-"`
Segments_Raw interface{} `json:"segments"`
Server string `json:"server"`
Workable_Servers []string `json:"workable_servers"`
Uniq int64 `json:"uniq"`
}
type RequestStats ¶
type RequestStats struct {
// contains filtered or unexported fields
}
type Search ¶
type Search struct {
ChunkSize int
Client *http.Client
Limit int64
MaxResults int64
Offset int64
Query string
Retries int
Verbose bool
// contains filtered or unexported fields
}
func (*Search) Execute ¶
func (s *Search) Execute() ([]SearchItem, error)
type SearchItem ¶
type SearchItem struct {
AddedDate string `json:"addeddate"`
AvgRating_Raw interface{} `json:"avg_rating"`
AvgRating []int
BTIH string `json:"btih"`
BackupLocation_Raw interface{} `json:"backup_location"`
BackupLocation []string
Collection []string `json:"collection"`
CollectionsOrdered string `json:"collections_ordered"`
CurateDate string `json:"curatedate"`
CurateNote_Raw interface{} `json:"curatenote"`
CurateNote []string
CurateState string `json:"curatestate"`
Curation_Raw interface{} `json:"curation"`
Curation []string
Curator string `json:"curator"`
Date_Raw interface{} `json:"date"`
Date []string
Description interface{} `json:"description"`
Downloads int `json:"downloads"`
ExternalMetadataUpdate string `json:"external_metadata_update"`
FilesCount int `json:"files_count"`
Format_Raw interface{} `json:"format"`
Format []string
Identifier string `json:"identifier"`
IndexDate string `json:"indexdate"`
ItemSize int `json:"item_size"`
LicenseURL string `json:"licenseurl"`
ListMemberships_Raw interface{} `json:"list_memberships"`
ListMemberships []string
MatchDateAoustid string `json:"match_date_acoustid"`
MediaType string `json:"mediatype"`
Month int `json:"month"`
NoArchiveTorrent string `json:"noarchivetorrent"`
NumFavorites int `json:"num_favorites"`
OaiUpdateDate_Raw interface{} `json:"oai_updatedate"`
OaiUpdateDate []string
PrimaryCollection string `json:"primary_collection"`
PublicDate string `json:"publicdate"`
ReportedServer string `json:"reported_server"`
ReviewBody_Raw interface{} `json:"reviewbody"`
ReviewBody []string
ReviewData []string `json:"review_data"`
Reviewer_Raw interface{} `json:"reviewer"`
Reviewer []string
ReviewerItemName_Raw interface{} `json:"reviewer_itemname"`
ReviewerItemname []string
Scanner_Raw interface{} `json:"scanner"`
Scanner []string
Subject_Raw interface{} `json:"subject"`
Subject []string
SubjectCount int `json:"subject_count"`
Stars_Raw interface{} `json:"stars"`
Stars []int
Title_Raw interface{} `json:"title"`
Title []string
Week int `json:"week"`
Year_Raw interface{} `json:"year"`
Year []int
}
type StringFields ¶
type StringFields struct {
// contains filtered or unexported fields
}
Click to show internal directories.
Click to hide internal directories.