Documentation
¶
Index ¶
- Constants
- Variables
- func GetTempDir() string
- func NormalizeOrigin(url string) string
- func NormalizePathForFile(path string) string
- type CacheConfig
- func (c *CacheConfig) GetCachePath(baseURL, subDir, urlPath string) string
- func (c *CacheConfig) GetCacheRoot(baseURL string) string
- func (c *CacheConfig) SaveDataURI(baseURL, urlPath, dataURI string) (string, error)
- func (c *CacheConfig) SaveMetadata(baseURL, urlPath string, metadata []byte) error
- func (c *CacheConfig) SaveToCache(baseURL, subDir, urlPath string, content []byte) error
- type FetchResult
- type Fetcher
- func (f *Fetcher) Fetch(rawURL string) ([]byte, error)
- func (f *Fetcher) FetchWithStatus(rawURL string) (*FetchResult, error)
- func (f *Fetcher) FetchWithStatusHead(rawURL string) (*FetchResult, error)
- func (f *Fetcher) SetCookies(targetURL string, cookies []*http.Cookie) error
- func (f *Fetcher) SetUserAgent(ua string)
- type FetcherConfig
Constants ¶
const DefaultUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36"
DefaultUserAgent 默认 User-Agent(模拟 Chrome,不暴露工具标识)
const (
// MaxBodySize 最大响应体大小 100MB
MaxBodySize = 100 * 1024 * 1024
)
Variables ¶
var ErrBodyTooLarge = errors.New("response body exceeds max size limit")
ErrBodyTooLarge 响应体超过限制
Functions ¶
func NormalizeOrigin ¶
NormalizeOrigin 规范化 origin 部分(Windows 安全) https://test.com:8080 → https_test.com_8080
func NormalizePathForFile ¶
NormalizePathForFile 规范化路径用于文件名(Windows 安全) /aa/bb/static/js/app.js → aa_bb_static_js_app.js
Types ¶
type CacheConfig ¶
CacheConfig 缓存配置
func (*CacheConfig) GetCachePath ¶
func (c *CacheConfig) GetCachePath(baseURL, subDir, urlPath string) string
GetCachePath 获取缓存文件路径 baseURL: https://test.com:8080/aa subDir: "js" 或 "source_map" urlPath: /static/js/app.js 返回: /tmp/ejfkdev/dj/https_test.com_8080_aa/js/static_js_app.js
func (*CacheConfig) GetCacheRoot ¶
func (c *CacheConfig) GetCacheRoot(baseURL string) string
GetCacheRoot 获取缓存根目录 输入: https://test.com:8080/aa 输出: /tmp/ejfkdev/dj/https_test.com_8080_aa
func (*CacheConfig) SaveDataURI ¶
func (c *CacheConfig) SaveDataURI(baseURL, urlPath, dataURI string) (string, error)
SaveDataURI 保存 data URI 内容到缓存 dataURI: data:application/json;base64,... 或 data:application/json,... baseURL: 来源页面的 base URL urlPath: 来源 JS 的路径 返回保存的路径
func (*CacheConfig) SaveMetadata ¶
func (c *CacheConfig) SaveMetadata(baseURL, urlPath string, metadata []byte) error
SaveMetadata 保存元数据 JSON 到缓存 如果 urlPath 为空,保存到根目录的 meta.json
func (*CacheConfig) SaveToCache ¶
func (c *CacheConfig) SaveToCache(baseURL, subDir, urlPath string, content []byte) error
SaveToCache 保存内容到缓存
type FetchResult ¶
type FetchResult struct {
Content []byte
StatusCode int
ContentType string
Headers http.Header // HTTP 响应头
}
FetchResult 包含内容和状态码
type Fetcher ¶
type Fetcher struct {
// contains filtered or unexported fields
}
Fetcher HTTP 下载器
func NewFetcherWithConfig ¶
func NewFetcherWithConfig(cfg FetcherConfig) (*Fetcher, error)
NewFetcherWithConfig 创建下载器,支持 uTLS 指纹伪装、Cookie Jar 和代理
func (*Fetcher) FetchWithStatus ¶
func (f *Fetcher) FetchWithStatus(rawURL string) (*FetchResult, error)
FetchWithStatus 获取 URL 内容和状态码
func (*Fetcher) FetchWithStatusHead ¶
func (f *Fetcher) FetchWithStatusHead(rawURL string) (*FetchResult, error)
FetchWithStatusHead 使用 HEAD 请求探测 URL 是否存在
func (*Fetcher) SetCookies ¶ added in v0.3.0
SetCookies 向 Fetcher 的 Cookie Jar 中注入 cookie
func (*Fetcher) SetUserAgent ¶
SetUserAgent 设置自定义 User-Agent
type FetcherConfig ¶ added in v0.3.0
type FetcherConfig struct {
Proxy string // 代理 URL (http/https/socks5)
UseUTLS bool // 启用 uTLS TLS 指纹伪装
}
FetcherConfig Fetcher 配置