fetcher

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 5, 2026 License: MPL-2.0 Imports: 22 Imported by: 0

Documentation

Index

Constants

View Source
const DefaultUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36"

DefaultUserAgent 默认 User-Agent(模拟 Chrome,不暴露工具标识)

View Source
const (
	// MaxBodySize 最大响应体大小 100MB
	MaxBodySize = 100 * 1024 * 1024
)

Variables

View Source
var ErrBodyTooLarge = errors.New("response body exceeds max size limit")

ErrBodyTooLarge 响应体超过限制

Functions

func GetTempDir

func GetTempDir() string

GetTempDir 获取临时目录

func NormalizeOrigin

func NormalizeOrigin(url string) string

NormalizeOrigin 规范化 origin 部分(Windows 安全) https://test.com:8080 → https_test.com_8080

func NormalizePathForFile

func NormalizePathForFile(path string) string

NormalizePathForFile 规范化路径用于文件名(Windows 安全) /aa/bb/static/js/app.js → aa_bb_static_js_app.js

Types

type CacheConfig

type CacheConfig struct {
	Enable  bool
	BaseDir string // 缓存根目录,如 /tmp/ejfkdev/dj
}

CacheConfig 缓存配置

func (*CacheConfig) GetCachePath

func (c *CacheConfig) GetCachePath(baseURL, subDir, urlPath string) string

GetCachePath 获取缓存文件路径 baseURL: https://test.com:8080/aa subDir: "js" 或 "source_map" urlPath: /static/js/app.js 返回: /tmp/ejfkdev/dj/https_test.com_8080_aa/js/static_js_app.js

func (*CacheConfig) GetCacheRoot

func (c *CacheConfig) GetCacheRoot(baseURL string) string

GetCacheRoot 获取缓存根目录 输入: https://test.com:8080/aa 输出: /tmp/ejfkdev/dj/https_test.com_8080_aa

func (*CacheConfig) SaveDataURI

func (c *CacheConfig) SaveDataURI(baseURL, urlPath, dataURI string) (string, error)

SaveDataURI 保存 data URI 内容到缓存 dataURI: data:application/json;base64,... 或 data:application/json,... baseURL: 来源页面的 base URL urlPath: 来源 JS 的路径 返回保存的路径

func (*CacheConfig) SaveMetadata

func (c *CacheConfig) SaveMetadata(baseURL, urlPath string, metadata []byte) error

SaveMetadata 保存元数据 JSON 到缓存 如果 urlPath 为空,保存到根目录的 meta.json

func (*CacheConfig) SaveToCache

func (c *CacheConfig) SaveToCache(baseURL, subDir, urlPath string, content []byte) error

SaveToCache 保存内容到缓存

type FetchResult

type FetchResult struct {
	Content     []byte
	StatusCode  int
	ContentType string
	Headers     http.Header // HTTP 响应头
}

FetchResult 包含内容和状态码

type Fetcher

type Fetcher struct {
	// contains filtered or unexported fields
}

Fetcher HTTP 下载器

func NewFetcher

func NewFetcher() *Fetcher

NewFetcher 创建下载器(使用默认配置)

func NewFetcherWithConfig

func NewFetcherWithConfig(cfg FetcherConfig) (*Fetcher, error)

NewFetcherWithConfig 创建下载器,支持 uTLS 指纹伪装、Cookie Jar 和代理

func (*Fetcher) Fetch

func (f *Fetcher) Fetch(rawURL string) ([]byte, error)

Fetch 获取 URL 内容

func (*Fetcher) FetchWithStatus

func (f *Fetcher) FetchWithStatus(rawURL string) (*FetchResult, error)

FetchWithStatus 获取 URL 内容和状态码

func (*Fetcher) FetchWithStatusHead

func (f *Fetcher) FetchWithStatusHead(rawURL string) (*FetchResult, error)

FetchWithStatusHead 使用 HEAD 请求探测 URL 是否存在

func (*Fetcher) SetCookies added in v0.3.0

func (f *Fetcher) SetCookies(targetURL string, cookies []*http.Cookie) error

SetCookies 向 Fetcher 的 Cookie Jar 中注入 cookie

func (*Fetcher) SetUserAgent

func (f *Fetcher) SetUserAgent(ua string)

SetUserAgent 设置自定义 User-Agent

type FetcherConfig added in v0.3.0

type FetcherConfig struct {
	Proxy   string // 代理 URL (http/https/socks5)
	UseUTLS bool   // 启用 uTLS TLS 指纹伪装
}

FetcherConfig Fetcher 配置

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL